Use offsets stored in AliasResult implemented in D98718.
Updated with fix of issue reported in https://reviews.llvm.org/D95543#2745161
Differential D95543
[GVN] Clobber partially aliased loads. dfukalov on Jan 27 2021, 10:17 AM. Authored by
Details Use offsets stored in AliasResult implemented in D98718. Updated with fix of issue reported in https://reviews.llvm.org/D95543#2745161
Diff Detail
Event TimelineComment Actions Performance-wise this looks good.
Comment Actions LGTM, you might want to pick up the additional test cases.
Comment Actions We're seeing some issues with this patch, potentially a miscompile. When we enable assertions we get an error about widening atomic loads -- I'm not sure this is the source of the miscompile we're seeing, but it certainly looks related (I think this is causing corruption and leading to issues elsewhere). $ cat repro.ll ; ModuleID = 'repro.ll' source_filename = "repro.ll" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %struct.widget = type { i32 } %struct.baz = type { i32, %struct.snork } %struct.snork = type { %struct.spam } %struct.spam = type { i32, i32 } @global = external local_unnamed_addr global %struct.widget, align 4 @global.1 = external local_unnamed_addr global i8, align 1 @global.2 = external local_unnamed_addr global i32, align 4 define void @zot(%struct.baz* %arg) local_unnamed_addr align 2 { bb: %tmp = getelementptr inbounds %struct.baz, %struct.baz* %arg, i64 0, i32 1 %tmp1 = bitcast %struct.snork* %tmp to i64* %tmp2 = load i64, i64* %tmp1, align 4 %tmp3 = getelementptr inbounds %struct.baz, %struct.baz* %arg, i64 0, i32 1, i32 0, i32 1 %tmp4 = icmp ugt i64 %tmp2, 4294967295 br label %bb5 bb5: ; preds = %bb14, %bb %tmp6 = load i32, i32* %tmp3, align 4 %tmp7 = icmp ne i32 %tmp6, 0 %tmp8 = select i1 %tmp7, i1 %tmp4, i1 false %tmp9 = zext i1 %tmp8 to i8 store i8 %tmp9, i8* @global.1, align 1 %tmp10 = load i32, i32* @global.2, align 4 switch i32 %tmp10, label %bb11 [ i32 1, label %bb12 i32 2, label %bb12 ] bb11: ; preds = %bb5 br label %bb14 bb12: ; preds = %bb5, %bb5 %tmp13 = load atomic i32, i32* getelementptr inbounds (%struct.widget, %struct.widget* @global, i64 0, i32 0) acquire, align 4 br label %bb14 bb14: ; preds = %bb12, %bb11 br label %bb5 } $ opt -O2 repro.ll -disable-output opt: /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp:496: llvm::Value *llvm::VNCoercion::getLoadValueForLoad(llvm::LoadInst *, unsigned int, llvm::Type *, llvm::Instruction *, const llvm::DataLayout &): Assertion `SrcVal->isSimple() && "Cannot widen volatile/atomic load!"' failed. PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace. Stack dump: 0. Program arguments: /home/rupprecht/dev/opt -O2 repro.ll -disable-output ... #10 0x000000000768c138 llvm::VNCoercion::getLoadValueForLoad(llvm::LoadInst*, unsigned int, llvm::Type*, llvm::Instruction*, llvm::DataLayout const&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Utils/VNCoercion.cpp:497:5 #11 0x00000000070a01ad llvm::gvn::AvailableValue::MaterializeAdjustedValue(llvm::LoadInst*, llvm::Instruction*, llvm::GVN&) const /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:902:11 #12 0x00000000070ae067 llvm::gvn::AvailableValueInBlock::MaterializeAdjustedValue(llvm::LoadInst*, llvm::GVN&) const /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:281:5 #13 0x00000000070a1d11 ConstructSSAForLoadSet(llvm::LoadInst*, llvm::SmallVectorImpl<llvm::gvn::AvailableValueInBlock>&, llvm::GVN&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:874:40 #14 0x00000000070a3ce6 llvm::GVN::processNonLocalLoad(llvm::LoadInst*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:1614:12 #15 0x00000000070a610a llvm::GVN::processLoad(llvm::LoadInst*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:1866:5 #16 0x00000000070a7398 llvm::GVN::processInstruction(llvm::Instruction*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:2292:9 #17 0x00000000070a8175 llvm::GVN::processBlock(llvm::BasicBlock*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:2490:24 #18 0x00000000070a7c4f llvm::GVN::iterateOnFunction(llvm::Function&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:2837:16 #19 0x000000000709fdd6 llvm::GVN::runImpl(llvm::Function&, llvm::AssumptionCache&, llvm::DominatorTree&, llvm::TargetLibraryInfo const&, llvm::AAResults&, llvm::MemoryDependenceResults*, llvm::LoopInfo*, llvm::OptimizationRemarkEmitter*, llvm::MemorySSA*) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:2436:20 #20 0x000000000709fa6e llvm::GVN::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/rupprecht/src/llvm-project/llvm/lib/Transforms/Scalar/GVN.cpp:674:8 #21 0x0000000007873fb7 llvm::detail::PassModel<llvm::Function, llvm::GVN, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Function> >::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/rupprecht/src/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:85:17 #22 0x000000000691574c llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function> >::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/rupprecht/src/llvm-project/llvm/include/llvm/IR/PassManager.h:517:16 #23 0x0000000004617e77 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function> >, llvm::PreservedAnalyses, llvm::AnalysisManager<llvm::Function> >::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) /home/rupprecht/src/llvm-project/llvm/include/llvm/IR/PassManagerInternal.h:85:17 Comment Actions Thanks for taking a look. Would it be all right to revert in the meantime? (I'm assuming the fix is not trivial) Comment Actions Temporarily reverted in fec2945998947f04d672e9c5f33b57f7177474c0 to keep trunk clean. I'm not a domain expert here, but I can test out a fix for this patch when you have one ready. Comment Actions Hi @nikic, I've prepared updated patch with fix for the issue reported recently. Should I create new review request or reopen this one and update the diff? Comment Actions Either way is fine. If you reopen this one, please also mark it as "review requested", so it goes back to the queue. Comment Actions Atomic loads are stored in MD as clobber dependent in caches so GVN got a trash Added storage map for offsets of different dependent loads, updated tests. Comment Actions Updated patch LG -- the unreduced test passes now (and has no assertion errors). Thanks! (Deferring to @nikic or others for actual re-review). Comment Actions Something that's not quite clear to me is why the Offset = None at the start of getDependency() wasn't sufficient. For the atomic load clobber case, wouldn't it keep that None value? Comment Actions getDependency() call is just a start of processing in GVN, and MemoryDependenceResults contains a number of a load' dependencies. One of them was NonLocal clobbering dependency between %tmp4.1 = load i8... and %tmp6.1 = load atomic i8... since latter is atomic (e.g. MemoryDependenceAnalysis.cpp$494-500). And GVN got this clobbering dependency not from getDependency(). Let's see what happening when GVN processes %tmp4.1 = load i8, i8* %tmp3.1, align 4 from the following IR define void @load_load_partial_alias_atomic(i8* %arg) { bb: %tmp1.1 = getelementptr inbounds i8, i8* %arg, i64 0 %tmp2.1 = getelementptr inbounds i8, i8* %arg, i64 1 %tmp2.2 = bitcast i8* %tmp2.1 to i64* %tmp2.3 = load i64, i64* %tmp2.2, align 4 %tmp2.4 = icmp ugt i64 %tmp2.3, 1 %tmp3.1 = getelementptr inbounds i8, i8* %arg, i64 2 br label %bb5 bb5: ; preds = %bb14, %bb %tmp4.1 = load i8, i8* %tmp3.1, align 4 %tmp6.1 = load atomic i8, i8* getelementptr inbounds (i8, i8* @global, i64 0) acquire, align 4 %tmp7.1 = add i8 %tmp6.1, %tmp4.1 store i8 %tmp7.1, i8* %tmp1.1 br label %bb5 } At first, getDependency() returns just NonLocal dependency and GVN falls to processNonLocalLoad() (GVN.cpp$1849-1853). Comment Actions Thanks for the explanation! Probably the best solution would be the same we did in AA, which is to embed the offset in MemDepResult, then we couldn't have this kind of "out of sync" issue by design. But this also LGTM. Comment Actions Yes, I thought about packing offset to MemDepResult, but it is already a packed pointer with other info and it seems to be overkill to double its size for a quite rare offset value. |
Do you have any information on why this is not a concern anymore? The comment was introduced in https://github.com/llvm/llvm-project/commit/a471751c24324e7ba6ac5c612dbedb16c644fc44, unfortunately without a test case :(