Index: llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp @@ -357,15 +357,11 @@ int MatchingId = -1; bool IsAtomic = false; - // TODO: Remove this flag. It would be strictly stronger to add a record - // to the AvailableInvariant table when passing the invariant load instead. - bool IsInvariant = false; - LoadValue() = default; LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId, - bool IsAtomic, bool IsInvariant) + bool IsAtomic) : DefInst(Inst), Generation(Generation), MatchingId(MatchingId), - IsAtomic(IsAtomic), IsInvariant(IsInvariant) {} + IsAtomic(IsAtomic) {} }; using LoadMapAllocator = @@ -889,6 +885,14 @@ ++CurrentGeneration; } + if (MemInst.isInvariantLoad()) { + // If we pass an invariant load, we know that memory location is + // indefinitely constant from the moment of first dereferenceability. + // We conservatively treat the invariant_load as that moment. + auto MemLoc = MemoryLocation::get(Inst); + AvailableInvariants.insert(MemLoc, CurrentGeneration); + } + // If we have an available version of this load, and if it is the right // generation or the load is known to be from an invariant location, // replace this instruction. @@ -903,8 +907,7 @@ !MemInst.isVolatile() && MemInst.isUnordered() && // We can't replace an atomic load with one which isn't also atomic. InVal.IsAtomic >= MemInst.isAtomic() && - (InVal.IsInvariant || - isOperatingOnInvariantMemAt(Inst, InVal.Generation) || + (isOperatingOnInvariantMemAt(Inst, InVal.Generation) || isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst, Inst))) { Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType()); @@ -925,7 +928,7 @@ AvailableLoads.insert( MemInst.getPointerOperand(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), - MemInst.isAtomic(), MemInst.isInvariantLoad())); + MemInst.isAtomic())); LastStore = nullptr; continue; } @@ -1050,7 +1053,7 @@ AvailableLoads.insert( MemInst.getPointerOperand(), LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), - MemInst.isAtomic(), /*IsInvariant=*/false)); + MemInst.isAtomic())); // Remember that this was the last unordered store we saw for DSE. We // don't yet handle DSE on ordered or volatile stores since we don't Index: llvm/trunk/test/Transforms/EarlyCSE/invariant-loads.ll =================================================================== --- llvm/trunk/test/Transforms/EarlyCSE/invariant-loads.ll +++ llvm/trunk/test/Transforms/EarlyCSE/invariant-loads.ll @@ -95,11 +95,43 @@ ret void } -define void @test_false_negative_dse(i32* %p, i1 %cnd) { -; CHECK-LABEL: @test_false_negative_dse -; CHECK: store +; By assumption, the call can't change contents of p +; LangRef is a bit unclear about whether the store is reachable, so +; for the moment we chose to be conservative and just assume it's valid +; to restore the same unchanging value. +define void @test_dse1(i32* %p) { +; CHECK-LABEL: @test_dse1 +; CHECK-NOT: store %v1 = load i32, i32* %p, !invariant.load !{} call void @clobber_and_use(i32 %v1) store i32 %v1, i32* %p ret void } + +; By assumption, v1 must equal v2 (TODO) +define void @test_false_negative_dse2(i32* %p, i32 %v2) { +; CHECK-LABEL: @test_false_negative_dse2 +; CHECK: store + %v1 = load i32, i32* %p, !invariant.load !{} + call void @clobber_and_use(i32 %v1) + store i32 %v2, i32* %p + ret void +} + +; If we remove the load, we still start an invariant scope since +; it lets us remove later loads not explicitly marked invariant +define void @test_scope_start_without_load(i32* %p) { +; CHECK-LABEL: @test_scope_start_without_load +; CHECK: %v1 = load i32, i32* %p +; CHECK: %add = add i32 %v1, %v1 +; CHECK: call void @clobber_and_use(i32 %add) +; CHECK: call void @clobber_and_use(i32 %v1) +; CHECK: ret void + %v1 = load i32, i32* %p + %v2 = load i32, i32* %p, !invariant.load !{} + %add = add i32 %v1, %v2 + call void @clobber_and_use(i32 %add) + %v3 = load i32, i32* %p + call void @clobber_and_use(i32 %v3) + ret void +}