diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -363,6 +363,7 @@ PredIteratorCache PredCache; unsigned DefaultBlockScanLimit; + Optional ClobberOffset; public: MemoryDependenceResults(AAResults &AA, AssumptionCache &AC, @@ -468,6 +469,8 @@ /// Release memory in caches. void releaseMemory(); + Optional getClobberOffset() const { return ClobberOffset; } + private: MemDepResult getCallDependencyFrom(CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt, diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -512,16 +512,12 @@ if (R == AliasResult::MustAlias) return MemDepResult::getDef(Inst); -#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads - // in terms of clobbering loads, but since it does this by looking - // at the clobbering load directly, it doesn't know about any - // phi translation that may have happened along the way. - // If we have a partial alias, then return this as a clobber for the // client to handle. - if (R == AliasResult::PartialAlias) + if (R == AliasResult::PartialAlias && R.hasOffset()) { + ClobberOffset = R.getOffset(); return MemDepResult::getClobber(Inst); -#endif + } // Random may-alias loads don't depend on each other without a // dependence. @@ -640,6 +636,7 @@ } MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) { + ClobberOffset = None; Instruction *ScanPos = QueryInst; // Check for a cached result diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -998,9 +998,22 @@ // Can't forward from non-atomic to atomic without violating memory model. if (DepLoad != Load && Address && Load->isAtomic() <= DepLoad->isAtomic()) { - int Offset = analyzeLoadFromClobberingLoad(Load->getType(), Address, - DepLoad, DL); - + Type *LoadType = Load->getType(); + int Offset = -1; + + // If Memory Dependence Analysis reported clobber check, it was nested + // and can be extracted from the MD result + if (DepInfo.isClobber() && + canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) { + const auto ClobberOff = MD->getClobberOffset(); + // TODO: GVN has no deal with a negative offset. + Offset = (ClobberOff == None || ClobberOff.getValue() < 0) + ? -1 + : ClobberOff.getValue(); + } + if (Offset == -1) + Offset = + analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL); if (Offset != -1) { Res = AvailableValue::getLoad(DepLoad, Offset); return true; diff --git a/llvm/test/Transforms/GVN/clobber-partial-alias.ll b/llvm/test/Transforms/GVN/clobber-partial-alias.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GVN/clobber-partial-alias.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -gvn -S | FileCheck %s + +define float @foo(float* %arg, i32 %i) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, float* [[ARG:%.*]], i64 [[I2]] +; CHECK-NEXT: [[I4:%.*]] = bitcast float* [[I3]] to <2 x float>* +; CHECK-NEXT: [[I5:%.*]] = load <2 x float>, <2 x float>* [[I4]], align 16 +; CHECK-NEXT: [[I7:%.*]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[I8:%.*]] = zext i32 [[I7]] to i64 +; CHECK-NEXT: [[I9:%.*]] = getelementptr inbounds float, float* [[ARG]], i64 [[I8]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[I5]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; CHECK-NEXT: [[I16:%.*]] = extractelement <2 x float> [[I5]], i32 0 +; CHECK-NEXT: [[I17:%.*]] = fmul float [[I16]], [[TMP3]] +; CHECK-NEXT: ret float [[I17]] +; +bb: + %i2 = zext i32 %i to i64 + %i3 = getelementptr inbounds float, float* %arg, i64 %i2 + %i4 = bitcast float* %i3 to <2 x float>* + %i5 = load <2 x float>, <2 x float>* %i4, align 16 + %i7 = add nuw nsw i32 %i, 1 + %i8 = zext i32 %i7 to i64 + %i9 = getelementptr inbounds float, float* %arg, i64 %i8 + %i10 = load float, float* %i9, align 4 + %i16 = extractelement <2 x float> %i5, i32 0 + %i17 = fmul float %i16, %i10 + ret float %i17 +}