diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -390,20 +390,29 @@ const uint64_t LaterSize = Later.Size.getValue(); const uint64_t EarlierSize = Earlier.Size.getValue(); - const Value *P1 = Earlier.Ptr->stripPointerCasts(); - const Value *P2 = Later.Ptr->stripPointerCasts(); + // Query the alias information + AliasResult AAR = AA.alias(Later, Earlier); // If the start pointers are the same, we just have to compare sizes to see if // the later store was larger than the earlier store. - if (P1 == P2 || AA.isMustAlias(P1, P2)) { + if (AAR == AliasResult::MustAlias) { // Make sure that the Later size is >= the Earlier size. if (LaterSize >= EarlierSize) return OW_Complete; } + // If we hit a partial alias we may have a full overwrite + if (AAR == AliasResult::PartialAlias) { + int64_t Off = AA.getClobberOffset(Later, Earlier).getValueOr(0); + if (Off > 0 && (uint64_t)Off + EarlierSize <= LaterSize) + return OW_Complete; + } + // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval/inalloca argument). If so, then it clearly // overwrites any other store to the same object. + const Value *P1 = Earlier.Ptr->stripPointerCasts(); + const Value *P2 = Later.Ptr->stripPointerCasts(); const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2); // If we can't resolve the same pointers to the same object, then we can't @@ -987,8 +996,8 @@ DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) - : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), - DL(F.getParent()->getDataLayout()) {} + : F(F), AA(AA), BatchAA(AA, /*CacheOffsets =*/true), MSSA(MSSA), DT(DT), + PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, diff --git a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -dse -S | FileCheck %s + +@BUFFER = external local_unnamed_addr global [0 x i8], align 1 + +define void @ArrayTestFullyOverlapping(i64 %0) { +; +; The DSE pass will try to kill the store of size i32 using the store of +; size i64 because they fully overlap, in fact: +; +; - they use the same base pointer (in SCEV style '@BUFFER + %0') +; - the offset between the two stores is 32 bits +; - the size of the earlier store is 32 bits +; - the size of the later store is 64 bits +; +; CHECK-LABEL: @ArrayTestFullyOverlapping( +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0:%.*]], -8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64* +; CHECK-NEXT: store i64 0, i64* [[TMP4]], align 4 +; CHECK-NEXT: ret void +; + %2 = add i64 %0, -8 + %3 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %2 + %4 = bitcast i8* %3 to i64* + %5 = add i64 %0, -4 + %6 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %5 + %7 = bitcast i8* %6 to i32* + store i32 1, i32* %7 + store i64 0, i64* %4 + ret void +} + +define void @VectorTestFullyOverlapping(float* %arg, i32 %i) { +; CHECK-LABEL: @VectorTestFullyOverlapping( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, float* [[ARG:%.*]], i64 [[I2]] +; CHECK-NEXT: [[I4:%.*]] = bitcast float* [[I3]] to <2 x float>* +; CHECK-NEXT: store <2 x float> zeroinitializer, <2 x float>* [[I4]], align 16 +; CHECK-NEXT: ret void +; +bb: + %i7 = add nuw nsw i32 %i, 1 + %i8 = zext i32 %i7 to i64 + %i9 = getelementptr inbounds float, float* %arg, i64 %i8 + store float 0.0, float* %i9, align 4 + %i2 = zext i32 %i to i64 + %i3 = getelementptr inbounds float, float* %arg, i64 %i2 + %i4 = bitcast float* %i3 to <2 x float>* + store <2 x float> , <2 x float>* %i4, align 16 + ret void +} + +define void @ArrayTestPartiallyOverlapping(i64 %0) { +; +; The DSE pass will not kill the store because the overlap is partial +; and won't fully clobber the i32 store. +; +; CHECK-LABEL: @ArrayTestPartiallyOverlapping( +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0:%.*]], 10 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64* +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 15 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +; CHECK-NEXT: store i32 1, i32* [[TMP7]], align 4 +; CHECK-NEXT: store i64 0, i64* [[TMP4]], align 4 +; CHECK-NEXT: ret void +; + %2 = add i64 %0, 10 + %3 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %2 + %4 = bitcast i8* %3 to i64* + %5 = add i64 %0, 15 + %6 = getelementptr inbounds [0 x i8], [0 x i8]* @BUFFER, i64 0, i64 %5 + %7 = bitcast i8* %6 to i32* + store i32 1, i32* %7 + store i64 0, i64* %4 + ret void +} + +define void @VectorTestPartiallyOverlapping(float* %arg, i32 %i) { +; +; The DSE pass will not kill the store because the overlap is partial +; and won't fully clobber the original store. +; +; CHECK-LABEL: @VectorTestPartiallyOverlapping( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I2:%.*]] = zext i32 [[I:%.*]] to i64 +; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds float, float* [[ARG:%.*]], i64 [[I2]] +; CHECK-NEXT: [[I4:%.*]] = bitcast float* [[I3]] to <2 x float>* +; CHECK-NEXT: store <2 x float> , <2 x float>* [[I4]], align 16 +; CHECK-NEXT: [[I5:%.*]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[I6:%.*]] = zext i32 [[I5]] to i64 +; CHECK-NEXT: [[I7:%.*]] = getelementptr inbounds float, float* [[ARG]], i64 [[I6]] +; CHECK-NEXT: [[I8:%.*]] = bitcast float* [[I7]] to <2 x float>* +; CHECK-NEXT: store <2 x float> zeroinitializer, <2 x float>* [[I8]], align 16 +; CHECK-NEXT: ret void +; +bb: + %i2 = zext i32 %i to i64 + %i3 = getelementptr inbounds float, float* %arg, i64 %i2 + %i4 = bitcast float* %i3 to <2 x float>* + store <2 x float> , <2 x float>* %i4, align 16 + %i5 = add nuw nsw i32 %i, 1 + %i6 = zext i32 %i5 to i64 + %i7 = getelementptr inbounds float, float* %arg, i64 %i6 + %i8 = bitcast float* %i7 to <2 x float>* + store <2 x float> , <2 x float>* %i8, align 16 + ret void +} +