Index: llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp @@ -281,21 +281,31 @@ /// that dominated values can succeed in their lookup. ScopedHTType AvailableValues; - /// \brief A scoped hash table of the current values of loads. + /// A scoped hash table of the current values of previously encounted memory + /// locations. /// - /// This allows us to get efficient access to dominating loads when we have - /// a fully redundant load. In addition to the most recent load, we keep - /// track of a generation count of the read, which is compared against the - /// current generation count. The current generation count is incremented + /// This allows us to get efficient access to dominating loads or stores when + /// we have a fully redundant load. In addition to the most recent load, we + /// keep track of a generation count of the read, which is compared against + /// the current generation count. The current generation count is incremented /// after every possibly writing memory operation, which ensures that we only - /// CSE loads with other loads that have no intervening store. + /// CSE loads with other loads that have no intervening store. Ordering + /// events (such as fences or atomic instructions) increment the generation + /// count as well; essentially, we model these as writes to all possible + /// locations. Note that atomic and/or volatile loads and stores can be + /// present the table; it is the responsibility of the consumer to inspect + /// the atomicity/volatility if needed. struct LoadValue { Value *Data; unsigned Generation; int MatchingId; - LoadValue() : Data(nullptr), Generation(0), MatchingId(-1) {} - LoadValue(Value *Data, unsigned Generation, unsigned MatchingId) - : Data(Data), Generation(Generation), MatchingId(MatchingId) {} + bool IsAtomic; + LoadValue() + : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {} + LoadValue(Value *Data, unsigned Generation, unsigned MatchingId, + bool IsAtomic) + : Data(Data), Generation(Generation), MatchingId(MatchingId), + IsAtomic(IsAtomic) {} }; typedef RecyclingAllocator> @@ -410,6 +420,42 @@ } return Inst->isAtomic(); } + bool isAtomic() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return false; + } + return Inst->isAtomic(); + } + bool isUnordered() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return true; + } + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isUnordered(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->isUnordered(); + } + // Conservative answer + return !Inst->isAtomic(); + } + + bool isVolatile() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return false; + } + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isVolatile(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->isVolatile(); + } + // Conservative answer + return true; + } + + bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { return (getPointerOperand() == Inst.getPointerOperand() && getMatchingId() == Inst.getMatchingId()); @@ -561,20 +607,22 @@ ParseMemoryInst MemInst(Inst, TTI); // If this is a non-volatile load, process it. if (MemInst.isValid() && MemInst.isLoad()) { - // Ignore volatile or ordered loads. - if (!MemInst.isSimple()) { + // (conservatively) we can't peak past the ordering implied by this + // operation, but we can add this load to our set of available values + if (MemInst.isVolatile() || !MemInst.isUnordered()) { LastStore = nullptr; - // Don't CSE across synchronization boundaries. - if (Inst->mayWriteToMemory()) - ++CurrentGeneration; - continue; + ++CurrentGeneration; } // If we have an available version of this load, and if it is the right // generation, replace this instruction. LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration && - InVal.MatchingId == MemInst.getMatchingId()) { + InVal.MatchingId == MemInst.getMatchingId() && + // We don't yet handle removing loads with ordering of any kind. + !MemInst.isVolatile() && MemInst.isUnordered() && + // We can't replace an atomic load with one which isn't also atomic. + InVal.IsAtomic >= MemInst.isAtomic()) { Value *Op = getOrCreateResult(InVal.Data, Inst->getType()); if (Op != nullptr) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst @@ -591,7 +639,8 @@ // Otherwise, remember that we have this instruction. AvailableLoads.insert( MemInst.getPointerOperand(), - LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); LastStore = nullptr; continue; } @@ -646,9 +695,12 @@ if (MemInst.isValid() && MemInst.isStore()) { // We do a trivial form of DSE if there are two stores to the same - // location with no intervening loads. Delete the earlier store. + // location with no intervening loads. Delete the earlier store. Note + // that we can delete an earlier simple store even if the following one + // is ordered/volatile/atomic store. if (LastStore) { ParseMemoryInst LastStoreMemInst(LastStore, TTI); + assert(LastStoreMemInst.isSimple() && "Violated invariant"); if (LastStoreMemInst.isMatchingMemLoc(MemInst)) { DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " << *Inst << '\n'); @@ -667,11 +719,17 @@ // the store. AvailableLoads.insert( MemInst.getPointerOperand(), - LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); // Remember that this was the last normal store we saw for DSE. + // Note that we can't delete an earlier atomic or volatile store in + // favor of a later one which isn't. We could in principal remove an + // earlier unordered store if the later one is also unordered. if (MemInst.isSimple()) LastStore = Inst; + else + LastStore = nullptr; } } } Index: llvm/trunk/test/Transforms/EarlyCSE/atomics.ll =================================================================== --- llvm/trunk/test/Transforms/EarlyCSE/atomics.ll +++ llvm/trunk/test/Transforms/EarlyCSE/atomics.ll @@ -0,0 +1,127 @@ +; RUN: opt < %s -S -early-cse | FileCheck %s + +; CHECK-LABEL: @test12( +define i32 @test12(i1 %B, i32* %P1, i32* %P2) { + %load0 = load i32, i32* %P1 + %1 = load atomic i32, i32* %P2 seq_cst, align 4 + %load1 = load i32, i32* %P1 + %sel = select i1 %B, i32 %load0, i32 %load1 + ret i32 %sel + ; CHECK: load i32, i32* %P1 + ; CHECK: load i32, i32* %P1 +} + +; CHECK-LABEL: @test13( +; atomic to non-atomic forwarding is legal +define i32 @test13(i1 %B, i32* %P1) { + %a = load atomic i32, i32* %P1 seq_cst, align 4 + %b = load i32, i32* %P1 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK: load atomic i32, i32* %P1 + ; CHECK: ret i32 0 +} + +; CHECK-LABEL: @test14( +; atomic to unordered atomic forwarding is legal +define i32 @test14(i1 %B, i32* %P1) { + %a = load atomic i32, i32* %P1 seq_cst, align 4 + %b = load atomic i32, i32* %P1 unordered, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK: load atomic i32, i32* %P1 + ; CHECK: ret i32 0 +} + +; CHECK-LABEL: @test15( +; implementation restiction: can't forward to stonger +; than unordered +define i32 @test15(i1 %B, i32* %P1, i32* %P2) { + %a = load atomic i32, i32* %P1 seq_cst, align 4 + %b = load atomic i32, i32* %P1 seq_cst, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK: load atomic i32, i32* %P1 + ; CHECK: load atomic i32, i32* %P1 +} + +; CHECK-LABEL: @test16( +; forwarding non-atomic to atomic is wrong! (However, +; it would be legal to use the later value in place of the +; former in this particular example. We just don't +; do that right now.) +define i32 @test16(i1 %B, i32* %P1, i32* %P2) { + %a = load i32, i32* %P1, align 4 + %b = load atomic i32, i32* %P1 unordered, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK: load i32, i32* %P1 + ; CHECK: load atomic i32, i32* %P1 +} + +; Can't DSE across a full fence +define void @test17(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test17 +; CHECK: store +; CHECK: store atomic +; CHECK: store + store i32 0, i32* %P1, align 4 + store atomic i32 0, i32* %P2 seq_cst, align 4 + store i32 0, i32* %P1, align 4 + ret void +} + +; Can't remove a volatile load +define i32 @test18(i1 %B, i32* %P1, i32* %P2) { + %a = load i32, i32* %P1, align 4 + %b = load volatile i32, i32* %P1, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK-LABEL: @test18 + ; CHECK: load i32, i32* %P1 + ; CHECK: load volatile i32, i32* %P1 +} + +; Can't DSE a volatile store +define void @test19(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test19 +; CHECK: store volatile +; CHECK: store + store volatile i32 0, i32* %P1, align 4 + store i32 3, i32* %P1, align 4 + ret void +} + +; Can value forward from volailes +define i32 @test20(i1 %B, i32* %P1, i32* %P2) { + %a = load volatile i32, i32* %P1, align 4 + %b = load i32, i32* %P1, align 4 + %res = sub i32 %a, %b + ret i32 %res + ; CHECK-LABEL: @test20 + ; CHECK: load volatile i32, i32* %P1 + ; CHECK: ret i32 0 +} + +; Can DSE a non-volatile store in favor of a volatile one +; currently a missed optimization +define void @test21(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test21 +; CHECK: store +; CHECK: store volatile + store i32 0, i32* %P1, align 4 + store volatile i32 3, i32* %P1, align 4 + ret void +} + +; Can DSE a normal store in favor of a unordered one +define void @test22(i1 %B, i32* %P1, i32* %P2) { +; CHECK-LABEL: @test22 +; CHECK-NEXT: store atomic + store i32 0, i32* %P1, align 4 + store atomic i32 3, i32* %P1 unordered, align 4 + ret void +} + + +