Index: llvm/include/llvm/Analysis/MemorySSA.h =================================================================== --- llvm/include/llvm/Analysis/MemorySSA.h +++ llvm/include/llvm/Analysis/MemorySSA.h @@ -200,6 +200,10 @@ return this->DefsOnlyType::getReverseIterator(); } + /// Used for debugging and tracking things about MemoryAccesses. + /// Guaranteed unique among MemoryAccesses, no guarantees otherwise. + inline unsigned getID() const; + protected: friend class MemoryDef; friend class MemoryPhi; @@ -211,10 +215,6 @@ /// moved. void setBlock(BasicBlock *BB) { Block = BB; } - /// Used for debugging and tracking things about MemoryAccesses. - /// Guaranteed unique among MemoryAccesses, no guarantees otherwise. - inline unsigned getID() const; - MemoryAccess(LLVMContext &C, unsigned Vty, DeleteValueTy DeleteValue, BasicBlock *BB, unsigned NumOperands) : DerivedUser(Type::getVoidTy(C), Vty, nullptr, NumOperands, DeleteValue), Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -172,6 +172,7 @@ AAResults *AA = nullptr; MemoryDependenceResults *MD = nullptr; + MemorySSA *MSSA = nullptr; DominatorTree *DT = nullptr; uint32_t nextValueNumber = 1; @@ -181,6 +182,7 @@ Value *LHS, Value *RHS); Expression createExtractvalueExpr(ExtractValueInst *EI); uint32_t lookupOrAddCall(CallInst *C); + uint32_t lookupOrAddLoadStore(Instruction *I); uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, uint32_t Num, GVN &Gvn); bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred, @@ -209,6 +211,7 @@ void setAliasAnalysis(AAResults *A) { AA = A; } AAResults *getAliasAnalysis() const { return AA; } void setMemDep(MemoryDependenceResults *M) { MD = M; } + void setMemorySSA(MemorySSA *M) { MSSA = M; } void setDomTree(DominatorTree *D) { DT = D; } uint32_t getNextUnusedValueNumber() { return nextValueNumber; } void verifyRemoved(const Value *) const; Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -501,6 +501,36 @@ } } +// If MemorySSA is available, the incoming memory state is included in the hash +// for loads and stores. If the incoming memory state is: +// * LiveOnEntry, do nothing. +// * a MemoryPhi, add <0, ID> where ID is the MemoryPhi ID. +// * a MemoryDef, add VN, where VN is the value number of the memory setting +// instruction. +uint32_t GVN::ValueTable::lookupOrAddLoadStore(Instruction *I) { + if (MSSA == nullptr) { + valueNumbering[I] = nextValueNumber; + return nextValueNumber++; + } + + Expression e; + e.type = I->getType(); + e.opcode = I->getOpcode(); + for (Use &Op : I->operands()) + e.varargs.push_back(lookupOrAdd(Op)); + + MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I); + if (isa(MA)) { + e.varargs.push_back(0); + e.varargs.push_back(MA->getID()); + } else if (!MSSA->isLiveOnEntryDef(MA)) + e.varargs.push_back(lookupOrAdd(cast(MA)->getMemoryInst())); + + uint32_t N = assignExpNewValueNum(e).first; + valueNumbering[I] = N; + return N; +} + /// Returns true if a value number exists for the specified value. bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; } @@ -571,6 +601,9 @@ valueNumbering[V] = nextValueNumber; NumberingPhi[nextValueNumber] = cast(V); return nextValueNumber++; + case Instruction::Load: + case Instruction::Store: + return lookupOrAddLoadStore(I); default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; @@ -670,7 +703,7 @@ auto *MemDep = isMemDepEnabled() ? &AM.getResult(F) : nullptr; auto *LI = AM.getCachedResult(F); - auto *MSSA = AM.getCachedResult(F); + auto *MSSA = &AM.getResult(F); auto &ORE = AM.getResult(F); bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, MSSA ? &MSSA->getMSSA() : nullptr); @@ -2028,6 +2061,16 @@ return false; } +// For certain opcodes, some varargs are not value numbers. Those numbers should +// not be translated. +static bool shouldNotPhiTranslate(const GVN::Expression &Exp, int Idx) { + return (Idx > 1 && Exp.opcode == Instruction::InsertValue) || + (Idx > 0 && Exp.opcode == Instruction::ExtractValue) || + (Idx > 1 && Exp.opcode == Instruction::ShuffleVector) || + (Idx > 0 && Exp.opcode == Instruction::Load) || + (Idx > 1 && Exp.opcode == Instruction::Store); +} + /// Translate value number \p Num using phis, so that it has the values of /// the phis in BB. uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, @@ -2053,12 +2096,7 @@ Expression Exp = Expressions[ExprIdx[Num]]; for (unsigned i = 0; i < Exp.varargs.size(); i++) { - // For InsertValue and ExtractValue, some varargs are index numbers - // instead of value numbers. Those index numbers should not be - // translated. - if ((i > 1 && Exp.opcode == Instruction::InsertValue) || - (i > 0 && Exp.opcode == Instruction::ExtractValue) || - (i > 1 && Exp.opcode == Instruction::ShuffleVector)) + if (shouldNotPhiTranslate(Exp, i)) continue; Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn); } @@ -2468,6 +2506,7 @@ ICF = &ImplicitCFT; this->LI = LI; VN.setMemDep(MD); + VN.setMemorySSA(MSSA); ORE = RunORE; InvalidBlockRPONumbers = true; MemorySSAUpdater Updater(MSSA); @@ -3062,6 +3101,24 @@ return !isGuaranteedToTransferExecutionToSuccessor(&I); } +// Return true iff the two instructions have the same volatile-ty and the same +// atomic ordering. +static bool canMergeInstructions(const Instruction &A, const Instruction &B) { + if (const auto *LA = dyn_cast(&A)) { + const auto *LB = cast(&B); + return LA->isVolatile() == LB->isVolatile() && + LA->getOrdering() == LB->getOrdering(); + } + + if (const auto *SA = dyn_cast(&A)) { + const auto *SB = cast(&B); + return SA->isVolatile() == SB->isVolatile() && + SA->getOrdering() == SB->getOrdering(); + } + + return true; +} + void GVN::collectHoistCandidates(BasicBlock *BB) { bool HasHoistBarrier = false; for (Instruction &I : *BB) { @@ -3090,8 +3147,11 @@ if (!HasHoistBarrier || isSafeToSpeculativelyExecute(&I)) { uint32_t N = VN.lookupOrAdd(&I); auto It = HoistPairs.find(N); - if (It != HoistPairs.end()) - It->second.second = &I; + if (It != HoistPairs.end()) { + // Only pair instructions with the same volatile-ty and atomic ordering. + if (canMergeInstructions(*It->second.first, I)) + It->second.second = &I; + } } if ((HasHoistBarrier |= isHoistBarrier(I))) @@ -3103,7 +3163,14 @@ void GVN::replaceInstruction(Instruction *I, Instruction *Repl) { LLVM_DEBUG(dbgs() << "Simple GVNHoist: replacing" << *I << " by" << *Repl << '\n';); + assert(llvm::none_of(InstrsToErase, + [=](const Instruction *X) { return I == X; }) && + "Deleting an instruction twice"); patchReplacementInstruction(I, Repl); + if (auto *LI = dyn_cast(Repl)) + LI->setAlignment(std::min(LI->getAlign(), cast(I)->getAlign())); + else if (auto *SI = dyn_cast(Repl)) + SI->setAlignment(std::min(SI->getAlign(), cast(I)->getAlign())); ICF->removeUsersOf(I); I->replaceAllUsesWith(Repl); salvageKnowledge(I, AC); @@ -3118,6 +3185,14 @@ I->eraseFromParent(); } +// Returns `true` if the instruction `Def` may write to the same memory that +// `Use` reads. +static bool defClobbersUse(AAResults *AA, Instruction *Def, Instruction *Use) { + Optional DefLoc = MemoryLocation::getOrNone(Def); + Optional UseLoc = MemoryLocation::getOrNone(Use); + return DefLoc && UseLoc ? !AA->isNoAlias(*DefLoc, *UseLoc) : true; +} + // Only hoist instructions from the "then" block. // Each hoisted instructions must be paired with an instruction from the "else" // block. @@ -3142,8 +3217,6 @@ It->second.second = nullptr; assert(ElseI->getParent() == ElseBB && "Instruction already removed"); - assert(!ThenI->mayReadOrWriteMemory() && !ElseI->mayReadOrWriteMemory() && - "Memory read/write instructions must not be hoisted."); bool Change = false; // Hoist operands. Begin by hoisting all of the operands of the "then" @@ -3169,10 +3242,76 @@ return {Change, true}; } + // Do the same for memory dependencies. + if (ThenI->mayReadOrWriteMemory()) { + // Hoist preceding aliased MemoryDefs. + auto *MSSA = MSSAU->getMemorySSA(); + auto *Walker = MSSA->getSkipSelfWalker(); + MemoryAccess *ThenMA = Walker->getClobberingMemoryAccess(ThenI); + while (!MSSA->isLiveOnEntryDef(ThenMA) && ThenMA->getBlock() == ThenBB) { + bool DepChange, CantHoist; + std::tie(DepChange, CantHoist) = + hoistPair(DestBB, ThenBB, ElseBB, + cast(ThenMA)->getMemoryInst(), nullptr); + Change |= DepChange; + if (CantHoist) + return {Change, true}; + ThenMA = Walker->getClobberingMemoryAccess(ThenI); + } + + MemoryAccess *ElseMA = Walker->getClobberingMemoryAccess(ElseI); + if (!MSSA->isLiveOnEntryDef(ElseMA) && ElseMA->getBlock() == ElseBB) + return {Change, true}; + + // For instructions that modify memory, try to hoist preceding aliased + // MemoryUses. + if (const auto *Def = dyn_cast(MSSA->getMemoryAccess(ThenI))) { + // Collect the uses ... + SmallVector MemoryUses; + for (const MemoryAccess &MA : *MSSA->getBlockAccesses(ThenBB)) { + if (const auto *Use = dyn_cast(&MA)) { + if (!MSSA->locallyDominates(Use, Def)) + continue; + Instruction *U = Use->getMemoryInst(); + if (!defClobbersUse(getAliasAnalysis(), ThenI, U)) + continue; + MemoryUses.push_back(U); + } + } + + // ... then try to hoist them. + for (Instruction *I : MemoryUses) { + bool DepChange, CantHoist; + std::tie(DepChange, CantHoist) = + hoistPair(DestBB, ThenBB, ElseBB, I, nullptr); + Change |= DepChange; + if (CantHoist) + return {Change, true}; + } + + // None should have remained in the "else" block. + Def = cast(MSSA->getMemoryAccess(ElseI)); + for (const MemoryAccess &MA : *MSSA->getBlockAccesses(ElseBB)) { + if (const auto *Use = dyn_cast(&MA)) { + if (!MSSA->locallyDominates(Use, Def)) + continue; + Instruction *U = Use->getMemoryInst(); + if (defClobbersUse(getAliasAnalysis(), ElseI, U)) + return {Change, true}; + } + } + } + } + + // Remove the instruction from the memory dependence cache, as the cache might + // keep a reference to the old block. + MD->removeInstruction(ThenI); // Hoist one of the instructions and replace all uses of the other with it. ICF->removeInstruction(ThenI); ICF->insertInstructionTo(ThenI, DestBB); ThenI->moveBefore(DestBB->getTerminator()); + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(ThenI)) + MSSAU->moveToPlace(MA, DestBB, MemorySSA::InsertionPlace::BeforeTerminator); replaceInstruction(ElseI, ThenI); return {true, false}; Index: llvm/test/Analysis/ValueTracking/gep-negative-issue.ll =================================================================== --- llvm/test/Analysis/ValueTracking/gep-negative-issue.ll +++ llvm/test/Analysis/ValueTracking/gep-negative-issue.ll @@ -35,9 +35,7 @@ ; CHECK: getelementptr inbounds double, double addrspace(100)* {{%.*}}, i64 -1 ; CHECK-NEXT: store double 0.000000e+00, double addrspace(100)* [[DST:%.*]] ; CHECK-NEXT: store double 0.000000e+00, double addrspace(100)* [[DST]] -; CHECK: load -; CHECK: getelementptr inbounds %ArrayImpl, %ArrayImpl addrspace(100)* -; CHECK: load +; CHECK: load double addrspace(100)* ; CHECK: getelementptr inbounds double, double addrspace(100)* {{%.*}}, i64 -1 ; CHECK: store double 0.000000e+00, double addrspace(100)* ; CHECK: ret Index: llvm/test/Transforms/GVN/PRE/load-pre-align.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/load-pre-align.ll +++ llvm/test/Transforms/GVN/PRE/load-pre-align.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -gvn -S | FileCheck %s -; RUN: opt < %s -passes="gvn" -S | FileCheck %s +; RUN: opt < %s -gvn --enable-simple-gvn-hoist=false -S | FileCheck %s +; RUN: opt < %s -passes="gvn" --enable-simple-gvn-hoist=false -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" Index: llvm/test/Transforms/GVN/PRE/pre-single-pred.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/pre-single-pred.ll +++ llvm/test/Transforms/GVN/PRE/pre-single-pred.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -gvn -enable-load-pre -S | FileCheck %s -; RUN: opt < %s -passes="gvn" -enable-load-pre=false -S | FileCheck %s +; RUN: opt < %s -gvn -enable-load-pre -enable-simple-gvn-hoist=false -S | FileCheck %s +; RUN: opt < %s -passes="gvn" -enable-simple-gvn-hoist=false -enable-load-pre=false -S | FileCheck %s ; This testcase assumed we'll PRE the load into %for.cond, but we don't actually ; verify that doing so is safe. If there didn't _happen_ to be a load in ; %for.end, we would actually be lengthening the execution on some paths, and Index: llvm/test/Transforms/GVN/PRE/volatile.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/volatile.ll +++ llvm/test/Transforms/GVN/PRE/volatile.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Tests that check our handling of volatile instructions encountered ; when scanning for dependencies -; RUN: opt -basic-aa -gvn -S < %s | FileCheck %s +; RUN: opt -basic-aa -gvn -enable-simple-gvn-hoist=false -S < %s | FileCheck %s ; Check that we can bypass a volatile load when searching ; for dependencies of a non-volatile load Index: llvm/test/Transforms/GVN/condprop.ll =================================================================== --- llvm/test/Transforms/GVN/condprop.ll +++ llvm/test/Transforms/GVN/condprop.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -gvn -S | FileCheck %s +; RUN: opt < %s -basic-aa -gvn -enable-simple-gvn-hoist=false -S | FileCheck %s @a = external global i32 ; [#uses=7] Index: llvm/test/Transforms/GVN/simple-gvnhoist-loadstore.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVN/simple-gvnhoist-loadstore.ll @@ -0,0 +1,649 @@ +; RUN: opt -S --aa-pipeline=tbaa,basic-aa --passes=gvn %s | FileCheck %s + +define dso_local i32 @basic_load_hoisting(i1 %cc, i32* %a, i32 %b, i32 %c) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load i32, i32* %a + %1 = add nsw i32 %0, %b + %2 = sdiv i32 %c, %1 + br label %if.end + +if.else: + %3 = load i32, i32* %a + %4 = add nsw i32 %3, %b + %5 = sdiv i32 %c, %4 + br label %if.end + +if.end: + %r = phi i32 [%2, %if.then], [%5, %if.else] + ret i32 %r +} +; CHECK-LABEL: @basic_load_hoisting +; CHECK: entry: +; CHECK: %0 = load i32, i32* %a, align 4 +; CHECK: %1 = add nsw i32 %0, %b +; CHECK: %2 = sdiv i32 %c, %1 +; CHECK: if.then: +; CHECK-NEXT: br label %if.end +; CHECK: if.else: +; CHECK-NEXT: br label %if.end +; CHECK: if.end: +; CHECK-NEXT: %r = phi i32 [ %2, %if.then ], [ %2, %if.else ] + +define dso_local i32 @hoist_volatile(i1 %cc, i32* %a) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load volatile i32, i32* %a + br label %if.end + +if.else: + %1 = load volatile i32, i32* %a + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%1, %if.else] + ret i32 %r +} +; CHECK-LABEL:@hoist_volatile +; CHECK: entry: +; CHECK: %0 = load volatile i32, i32* %a, align 4 +; CHECK: if.then: +; CHECK-NEXT: br label %if.end +; CHECK: if.else: +; CHECK-NEXT: br label %if.end +; CHECK: if.end: +; CHECK-NEXT: %r = phi i32 [ %0, %if.then ], [ %0, %if.else ] + + +define dso_local i32 @no_merge_volatile_with_non_volatile(i1 %cc, i32* %a) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load i32, i32* %a + br label %if.end + +if.else: + %1 = load volatile i32, i32* %a + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%1, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_merge_volatile_with_non_volatile +; CHECK: entry: +; CHECK-NEXT: br i1 %cc, label %if.then, label %if.else +; CHECK: if.then: +; CHECK-NEXT: %0 = load i32, i32* %a, align 4 +; CHECK: if.else: +; CHECK-NEXT: %1 = load volatile i32, i32* %a, align 4 + +define dso_local i32 @hoist_atomic(i1 %cc, i32* %a) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load atomic i32, i32* %a acquire, align 4 + br label %if.end + +if.else: + %1 = load atomic i32, i32* %a acquire, align 4 + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%1, %if.else] + ret i32 %r +} +; CHECK-LABEL: @hoist_atomic +; CHECK: entry: +; CHECK: %0 = load atomic i32, i32* %a acquire, align 4 +; CHECK: if.then: +; CHECK-NEXT: br label %if.end +; CHECK: if.else: +; CHECK-NEXT: br label %if.end +; CHECK: if.end: +; CHECK-NEXT: %r = phi i32 [ %0, %if.then ], [ %0, %if.else ] + +define dso_local i32 @no_merge_atomic_different_ordering0(i1 %cc, i32* %a) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load i32, i32* %a, align 4 + br label %if.end + +if.else: + %1 = load atomic i32, i32* %a acquire, align 4 + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%1, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_merge_atomic_different_ordering0 +; CHECK: entry: +; CHECK-NEXT: br i1 %cc, label %if.then, label %if.else +; CHECK: if.then: +; CHECK-NEXT: %0 = load i32, i32* %a, align 4 +; CHECK: if.else: +; CHECK-NEXT: %1 = load atomic i32, i32* %a acquire, align 4 + +define dso_local i32 @no_merge_atomic_different_ordering1(i1 %cc, i32* %a) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load atomic i32, i32* %a acquire, align 4 + br label %if.end + +if.else: + %1 = load atomic i32, i32* %a monotonic, align 4 + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%1, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_merge_atomic_different_ordering1 +; CHECK: entry: +; CHECK-NEXT: br i1 %cc, label %if.then, label %if.else +; CHECK: if.then: +; CHECK-NEXT: %0 = load atomic i32, i32* %a acquire, align 4 +; CHECK: if.else: +; CHECK-NEXT: %1 = load atomic i32, i32* %a monotonic, align 4 + +define dso_local i32 @no_reorder_load_over_aliasing_store0(i1 %cc, i32* %a, i32* %b) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + store i32 0, i32* %b + %0 = load i32, i32* %a + br label %if.end + +if.else: + %1 = load i32, i32* %a + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%1, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_reorder_load_over_aliasing_store0 +; CHECK: entry: +; CHECK-NEXT: br i1 %cc, label %if.then, label %if.else +; CHECK: if.then: +; CHECK-NEXT: store i32 0, i32* %b, align 4 +; CHECK-NEXT: %0 = load i32, i32* %a, align 4 +; CHECK: if.else: +; CHECK-NEXT: %1 = load i32, i32* %a, align 4 + +define dso_local i32 @no_reorder_load_over_aliasing_store1(i1 %cc, i32* %a, i32* %b, i32* %c, i32 %d, i32 %e) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + store i32 0, i32* %c, !tbaa !6 + store i32 0, i32* %b, !tbaa !5 + %0 = load i32, i32* %a, !tbaa !4 + br label %if.end + +if.else: + store i32 0, i32* %b, !tbaa !5 + %1 = load i32, i32* %a, !tbaa !4 + %2 = add i32 %1, %d + %3 = add i32 %2, %e + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%3, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_reorder_load_over_aliasing_store1 +; CHECK: entry: +; CHECK-NEXT: store i32 0, i32* %b, align 4, !tbaa ![[#B:]] +; CHECK: if.then: +; CHECK-NEXT: store i32 0, i32* %c, align 4, !tbaa ![[#C:]] +; CHECK-NEXT: %0 = load i32, i32* %a, align 4, !tbaa ![[#A:]] +; CHECK: if.else: +; CHECK-NEXT: %1 = load i32, i32* %a, align 4, !tbaa ![[#A]] + +define dso_local i32 @reorder_non_volatile(i1 %cc, i32* %a, i32* %b, i32 %c) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load i32, i32* %a + %1 = load i32, i32* %b + %2 = add nsw i32 %0, %1 + br label %if.end + +if.else: + %3 = load volatile i32, i32* %b + %4 = load i32, i32* %a + %5 = add nsw i32 %3, %4 + br label %if.end + +if.end: + %r = phi i32 [%2, %if.then], [%5, %if.else] + ret i32 %r +} +; CHECK-LABEL: @reorder_non_volatile +; CHECK: entry: +; CHECK-NEXT: %0 = load i32, i32* %a, align 4 +; CHECK: if.then: +; CHECK-NEXY: %1 = load i32, i32* %b, align 4 +; CHECK: if.else: +; CHECK-NEXT: %3 = load volatile i32, i32* %b, align 4 + +define dso_local i32 @no_reorder_volatile(i1 %cc, i32* %a, i32* %b, i32 %c) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load volatile i32, i32* %a + %1 = load volatile i32, i32* %b + %2 = add nsw i32 %0, %1 + br label %if.end + +if.else: + %3 = load volatile i32, i32* %b + %4 = load volatile i32, i32* %a + %5 = add nsw i32 %3, %4 + br label %if.end + +if.end: + %r = phi i32 [%2, %if.then], [%5, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_reorder_volatile +; CHECK: entry: +; CHECK-NEXT: br i1 %cc, label %if.then, label %if.else +; CHECK: if.then: +; CHECK-NEXT: %0 = load volatile i32, i32* %a, align 4 +; CHECK-NEXT: %1 = load volatile i32, i32* %b, align 4 +; CHECK: if.else: +; CHECK-NEXT: %3 = load volatile i32, i32* %b, align 4 +; CHECK-NEXT: %4 = load volatile i32, i32* %a, align 4 + +define dso_local i32 @no_reorder_atomic0(i1 %cc, i32* %a, i32* %b, i32 %c) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load i32, i32* %a + %1 = load i32, i32* %b + %2 = add nsw i32 %0, %1 + br label %if.end + +if.else: + %3 = load atomic i32, i32* %b acquire, align 4 + %4 = load i32, i32* %a + %5 = add nsw i32 %3, %4 + br label %if.end + +if.end: + %r = phi i32 [%2, %if.then], [%5, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_reorder_atomic0(i1 %cc, i32* %a, i32* %b, i32 %c) { +; CHECK: entry: +; CHECK-NEXT: br i1 %cc, label %if.then, label %if.else +; CHECK: if.then: +; CHECK-NEXT: %0 = load i32, i32* %a, align 4 +; CHECK-NEXT: %1 = load i32, i32* %b, align 4 +; CHECK: if.else: +; CHECK-NEXT: %3 = load atomic i32, i32* %b acquire, align 4 +; CHECK-NEXT: %4 = load i32, i32* %a, align 4 + +define dso_local i32 @no_reorder_atomic1(i1 %cc, i32* %a, i32* %b, i32 %c) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load atomic i32, i32* %a acquire, align 4 + %1 = load atomic i32, i32* %b acquire, align 4 + %2 = add nsw i32 %0, %1 + br label %if.end + +if.else: + %3 = load atomic i32, i32* %a acquire, align 4 + %4 = load atomic i32, i32* %b acquire, align 4 + %5 = add nsw i32 %3, %4 + br label %if.end + +if.end: + %r = phi i32 [%2, %if.then], [%5, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_reorder_atomic1(i1 %cc, i32* %a, i32* %b, i32 %c) { +; CHECK: entry: +; CHECK-NEXT: %0 = load atomic i32, i32* %a acquire, align 4 +; CHECK-NEXT: %1 = load atomic i32, i32* %b acquire, align 4 +; CHECK: if.then: +; CHECK-NEXT: br label %if.end +; CHECK: if.else: +; CHECK-NEXT: br label %if.end + + +define dso_local i32 @reorder_load_over_non_aliasing_store(i1 %cc, i32* %a, i32 %b, i32 %c, i32* %p) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + store i32 0, i32* %p, !tbaa !5 + %0 = load i32, i32* %a, !tbaa !6 + %1 = add nsw i32 %0, %b + %2 = sdiv i32 %c, %0 + br label %if.end + +if.else: + %3 = load i32, i32* %a, !tbaa !6 + %4 = add nsw i32 %3, %b + %5 = sdiv i32 %c, %4 + br label %if.end + +if.end: + %r = phi i32 [%2, %if.then], [%5, %if.else] + ret i32 %r +} +; CHECK-LABEL: @reorder_load_over_non_aliasing_store +; CHECK: entry: +; CHECK-NEXT: %0 = load i32, i32* %a, align 4, !tbaa ![[#C]] +; CHECK-NEXT: %1 = add nsw i32 %0, %b +; CHECK: if.then: +; CHECK-NEXT: store i32 0, i32* %p, align 4, !tbaa ![[#B]] +; CHECK-NEXT: %2 = sdiv i32 %c, %0 +; CHECK: if.else: +; CHECK-NEXT: %3 = sdiv i32 %c, %1 +; CHECK: if.end: +; CHECK-NEXT: %r = phi i32 [ %2, %if.then ], [ %3, %if.else ] + +define dso_local i32 @load_phi_dep(i1 %cc0, i1 %cc1, i32* %a, i32 %b, i32* %p) { +entry: + br i1 %cc0, label %if.then0, label %if.else0 + +if.then0: + store i32 0, i32* %p + br label %bb + +if.else0: + br label %bb + +bb: + br i1 %cc1, label %if.then1, label %if.else1 + +if.then1: + %0 = load i32, i32* %a + %1 = add nsw i32 %0, %b + br label %if.end + +if.else1: + %2 = load i32, i32* %a + %3 = add nsw i32 %2, %b + br label %if.end + +if.end: + %r = phi i32 [%1, %if.then1], [%3, %if.else1] + ret i32 %r +} +; CHECK-LABEL: @load_phi_dep +; CHECK: bb: +; CHECK-NEXT: %0 = load i32, i32* %a, align 4 +; CHECK-NEXT: %1 = add nsw i32 %0, %b +; CHECK: if.then1: +; CHECK-NEXT: br label %if.end +; CHECK: if.else1: +; CHECK-NEXT: br label %if.end +; CHECK: if.end: +; CHECK-NEXT: %r = phi i32 [ %1, %if.then1 ], [ %1, %if.else1 ] + +define dso_local i32 @host_store0(i1 %cc, i32* %a, i32 %b, i32* %p) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + store i32 0, i32* %p + %0 = add nsw i32 %b, 1 + br label %if.end + +if.else: + store i32 0, i32* %p + %1 = load i32, i32* %a + %2 = add nsw i32 %b, %1 + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%2, %if.else] + ret i32 %r +} +; CHECK-LABEL: @host_store0 +; CHECK: entry: +; CHECK-NEXT: store i32 0, i32* %p, align 4 +; CHECK: if.then: +; CHECK-NEXT: %0 = add nsw i32 %b, 1 +; CHECK: if.else: +; CHECK-NEXT: %1 = load i32, i32* %a, align 4 +; CHECK-NEXT: %2 = add nsw i32 %b, %1 + +define dso_local i32 @host_store1(i1 %cc, i32* %a, i32 %b, i32* %p) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + store i32 1, i32* %a + store i32 0, i32* %p + %0 = add nsw i32 %b, 1 + br label %if.end + +if.else: + store i32 1, i32* %a + store i32 0, i32* %p + %1 = load i32, i32* %a + %2 = add nsw i32 %b, %1 + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%2, %if.else] + ret i32 %r +} +; CHECK-LABEL: @host_store1 +; CHECK: entry: +; CHECK-NEXT: store i32 1, i32* %a, align 4 +; CHECK-NEXT: store i32 0, i32* %p, align 4 +; CHECK: if.then: +; CHECK-NEXT: %0 = add nsw i32 %b, 1 +; CHECK: if.else: +; CHECK-NEXT: %1 = load i32, i32* %a, align 4 +; CHECK-NEXT: %2 = add nsw i32 %b, %1 + +define dso_local void @no_store_reorder_across_aliasing_store0(i1 %cc, i32* %a, i32* %b) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + store i32 0, i32* %a + store i32 0, i32* %b + br label %if.end + +if.else: + store i32 0, i32* %b + br label %if.end + +if.end: + ret void +} +; CHECK-LABEL: @no_store_reorder_across_aliasing_store0 +; CHECK: entry: +; CHECK-NEXT: br i1 %cc, label %if.then, label %if.else +; CHECK: if.then: +; CHECK-NEXT: store i32 0, i32* %a, align 4 +; CHECK-NEXT: store i32 0, i32* %b, align 4 +; CHECK: if.else: +; CHECK-NEXT: store i32 0, i32* %b, align 4 + +define dso_local i32 @no_store_reorder_across_aliasing_store1(i1 %cc, i32* %a, i32* %b, i32* %c) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + store i32 0, i32* %b + store i32 1, i32* %a + %0 = load i32, i32* %c + br label %if.end + +if.else: + store i32 1, i32* %a + %1 = load i32, i32* %c + %2 = add nsw i32 %1, 1 + %3 = mul nsw i32 %2, 2 + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%3, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_store_reorder_across_aliasing_store1(i1 %cc, i32* %a, i32* %b, i32* %c) { +; CHECK: entry: +; CHECK-NEXT: br i1 %cc, label %if.then, label %if.else +; CHECK: if.then: +; CHECK-NEXT: store i32 0, i32* %b, align 4 +; CHECK-NEXT: store i32 1, i32* %a, align 4 +; CHECK: if.else: +; CHECK-NEXT: store i32 1, i32* %a, align 4 + +define dso_local i32 @no_store_reorder_across_aliasing_load0(i1 %cc, i32* %a, i32 %b, i32* %p) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + store i32 0, i32* %p + %0 = add nsw i32 %b, 1 + br label %if.end + +if.else: + %1 = load i32, i32* %a + store i32 0, i32* %p + %2 = add nsw i32 %1, %b + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%2, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_store_reorder_across_aliasing_load0 +; CHECK: entry: +; CHECK-NEXT: br i1 %cc, label %if.then, label %if.else +; CHECK: if.then: +; CHECK-NEXT: store i32 0, i32* %p, align 4 +; CHECK: if.else: +; CHECK-NEXT: %1 = load i32, i32* %a, align 4 +; CHECK-NEXT: store i32 0, i32* %p, align 4 + +define dso_local i32 @no_store_reorder_across_aliasing_load1(i1 %cc, i32* %a, i32* %b, i32* %p) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load i32, i32* %a + store i32 0, i32* %p + %1 = add nsw i32 %0, 1 + br label %if.end + +if.else: + %2 = load i32, i32* %a + %3 = load i32, i32* %b + store i32 0, i32* %p + %4 = add nsw i32 %2, %3 + br label %if.end + +if.end: + %r = phi i32 [%1, %if.then], [%4, %if.else] + ret i32 %r +} +; CHECK-LABEL: @no_store_reorder_across_aliasing_load1 +; CHECK: entry: +; CHECK-NEXT: %0 = load i32, i32* %a, align 4 +; CHECK-NEXT: br i1 %cc, label %if.then, label %if.else +; CHECK: if.then: +; CHECK-NEXT: store i32 0, i32* %p, align 4 +; CHECK-NEXT: %1 = add nsw i32 %0, 1 +; CHECK: if.else: +; CHECK-NEXT: %2 = load i32, i32* %b, align 4 +; CHECK-NEXT: store i32 0, i32* %p, align 4 +; CHECK-NEXT: %3 = add nsw i32 %0, %2 +; CHECK: if.end: +; CHECK-NEXT: %r = phi i32 [ %1, %if.then ], [ %3, %if.else ] + +define dso_local void @store_reorder_across_nonaliasing_store(i1 %cc, i32* %b, i32* %c) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + store i32 0, i32* %b, !tbaa !5 + store i32 1, i32* %c, !tbaa !6 + br label %if.end + +if.else: + store i32 1, i32* %c, !tbaa !6 + store i32 0, i32* %b, !tbaa !5 + br label %if.end + +if.end: + ret void +} +; CHECK-LABEL: @store_reorder_across_nonaliasing_store +; CHECK: entry: +; CHECK-NEXT: store i32 1, i32* %c, align 4, !tbaa ![[#C]] +; CHECK-NEXT: store i32 0, i32* %b, align 4, !tbaa ![[#B]] +; CHECK: if.then: +; CHECK-NEXT: br label %if.end +; CHECK: if.else: +; CHECK-NEXT: br label %if.end + +define dso_local i32 @store_reorder_across_nonaliasing_load(i1 %cc, i32* %b, i32* %c) { +entry: + br i1 %cc, label %if.then, label %if.else + +if.then: + %0 = load i32, i32* %b, !tbaa !5 + store i32 1, i32* %c, !tbaa !6 + br label %if.end + +if.else: + store i32 1, i32* %c, !tbaa !6 + %1 = load i32, i32* %b + br label %if.end + +if.end: + %r = phi i32 [%0, %if.then], [%1, %if.else] + ret i32 %r +} +; CHECK-LABEL: @store_reorder_across_nonaliasing_load +; CHECK: entry: +; CHECK-NEXT: store i32 1, i32* %c, align 4, !tbaa !4 +; CHECK: if.then: +; CHECK-NEXT: %0 = load i32, i32* %b, align 4, !tbaa !0 +; CHECK: if.else: +; CHECK-NEXT: %1 = load i32, i32* %b, align 4 + +!0 = !{!"TBAA"} +!1 = !{!"A", !0, i64 0} +!2 = !{!"B", !1, i64 0} +!3 = !{!"C", !1, i64 0} +!4 = !{!1, !1, i64 0} +!5 = !{!2, !2, i64 0} +!6 = !{!3, !3, i64 0} + +; CHECK: ![[#B]] = !{!1, !1, i64 0} +; CHECK: !1 = !{!"B", !2, i64 0} +; CHECK: !2 = !{!"A", !3, i64 0} +; CHECK: !3 = !{!"TBAA"} +; CHECK: ![[#C]] = !{!5, !5, i64 0} +; CHECK: !5 = !{!"C", !2, i64 0} +; CHECK: ![[#A]] = !{!2, !2, i64 0} Index: llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll =================================================================== --- llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll +++ llvm/test/Transforms/GVN/simple-gvnhoist-scalars.ll @@ -74,18 +74,18 @@ ret i32 %r } -; speculation barrier +; speculation barrier (it could have hoisted the `sdiv`s, but the search stops +; at the `store` before the `store` itself is hoisted). define dso_local i32 @spec_barrier1(i1 %cc, i32 %a, i32 %b, i32 %c, i32* %p) { ; CHECK-LABEL: @spec_barrier1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: store volatile i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br i1 [[CC:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: store volatile i32 0, i32* [[P:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = sdiv i32 [[C:%.*]], [[TMP0]] ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i32 [[C]], [[TMP0]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: @@ -200,7 +200,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i32 [[TMP2]], [[TMP1]] ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[TMP4:%.*]] = load atomic volatile i32, i32* [[P]] acquire, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load atomic volatile i32, i32* [[P]] monotonic, align 4 ; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: @@ -218,7 +218,7 @@ br label %if.end if.else: - %4 = load atomic volatile i32, i32* %p acquire, align 4 + %4 = load atomic volatile i32, i32* %p monotonic, align 4 %5 = sdiv i32 %a, %b %6 = add nsw i32 %c, %5 %7 = mul nsw i32 %4, %6 Index: llvm/test/Transforms/GVNHoist/hoist-inline.ll =================================================================== --- llvm/test/Transforms/GVNHoist/hoist-inline.ll +++ llvm/test/Transforms/GVNHoist/hoist-inline.ll @@ -3,8 +3,8 @@ ; Check that the inlined loads are hoisted. ; CHECK-LABEL: define i32 @fun( ; CHECK-LABEL: entry: +; CHECK-NOT: if.then ; CHECK: load i32, i32* @A -; CHECK: if.then: @A = external global i32 @B = external global i32 Index: llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll =================================================================== --- llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll +++ llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll @@ -2,7 +2,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; RUN: opt -O2 -S < %s | FileCheck %s +; RUN: opt -O2 -S --enable-simple-gvn-hoist=false < %s | FileCheck %s ; CHECK-LABEL: main ; CHECK: if.end