diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -7097,60 +7097,18 @@ Optional llvm::isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL) { - Ptr1 = Ptr1->stripPointerCasts(); - Ptr2 = Ptr2->stripPointerCasts(); + APInt Offset1(DL.getIndexTypeSizeInBits(Ptr1->getType()), 0); + APInt Offset2(DL.getIndexTypeSizeInBits(Ptr2->getType()), 0); + Ptr1 = Ptr1->stripAndAccumulateConstantOffsets(DL, Offset1, true); + Ptr2 = Ptr2->stripAndAccumulateConstantOffsets(DL, Offset2, true); // Handle the trivial case first. - if (Ptr1 == Ptr2) { - return 0; - } + if (Ptr1 == Ptr2) + return Offset2.getSExtValue() - Offset1.getSExtValue(); const GEPOperator *GEP1 = dyn_cast(Ptr1); const GEPOperator *GEP2 = dyn_cast(Ptr2); - // If one pointer is a GEP see if the GEP is a constant offset from the base, - // as in "P" and "gep P, 1". - // Also do this iteratively to handle the the following case: - // Ptr_t1 = GEP Ptr1, c1 - // Ptr_t2 = GEP Ptr_t1, c2 - // Ptr2 = GEP Ptr_t2, c3 - // where we will return c1+c2+c3. - // TODO: Handle the case when both Ptr1 and Ptr2 are GEPs of some common base - // -- replace getOffsetFromBase with getOffsetAndBase, check that the bases - // are the same, and return the difference between offsets. - auto getOffsetFromBase = [&DL](const GEPOperator *GEP, - const Value *Ptr) -> Optional { - const GEPOperator *GEP_T = GEP; - int64_t OffsetVal = 0; - bool HasSameBase = false; - while (GEP_T) { - auto Offset = getOffsetFromIndex(GEP_T, 1, DL); - if (!Offset) - return None; - OffsetVal += *Offset; - auto Op0 = GEP_T->getOperand(0)->stripPointerCasts(); - if (Op0 == Ptr) { - HasSameBase = true; - break; - } - GEP_T = dyn_cast(Op0); - } - if (!HasSameBase) - return None; - return OffsetVal; - }; - - if (GEP1) { - auto Offset = getOffsetFromBase(GEP1, Ptr2); - if (Offset) - return -*Offset; - } - if (GEP2) { - auto Offset = getOffsetFromBase(GEP2, Ptr1); - if (Offset) - return Offset; - } - // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical // base. After that base, they may have some number of common (and // potentially variable) indices. After that they handle some constant @@ -7166,9 +7124,10 @@ if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) break; - auto Offset1 = getOffsetFromIndex(GEP1, Idx, DL); - auto Offset2 = getOffsetFromIndex(GEP2, Idx, DL); - if (!Offset1 || !Offset2) + auto IOffset1 = getOffsetFromIndex(GEP1, Idx, DL); + auto IOffset2 = getOffsetFromIndex(GEP2, Idx, DL); + if (!IOffset1 || !IOffset2) return None; - return *Offset2 - *Offset1; + return *IOffset2 - *IOffset1 + Offset2.getSExtValue() - + Offset1.getSExtValue(); } diff --git a/llvm/test/Transforms/MemCpyOpt/opaque-ptr.ll b/llvm/test/Transforms/MemCpyOpt/opaque-ptr.ll --- a/llvm/test/Transforms/MemCpyOpt/opaque-ptr.ll +++ b/llvm/test/Transforms/MemCpyOpt/opaque-ptr.ll @@ -22,10 +22,9 @@ define void @test_different_gep_source_elements(ptr %src) { ; CHECK-LABEL: @test_different_gep_source_elements( ; CHECK-NEXT: [[PB:%.*]] = getelementptr [[B:%.*]], ptr [[SRC:%.*]], i64 0, i32 1 -; CHECK-NEXT: store i64 0, ptr [[PB]], align 4 ; CHECK-NEXT: [[PA:%.*]] = getelementptr [[A:%.*]], ptr [[SRC]], i64 0, i32 1 ; CHECK-NEXT: [[PA2:%.*]] = getelementptr [[A]], ptr [[SRC]], i64 0, i32 2 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[PA]], i8 0, i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[PB]], i8 0, i64 20, i1 false) ; CHECK-NEXT: ret void ; %pb = getelementptr %b, ptr %src, i64 0, i32 1 @@ -37,5 +36,23 @@ ret void } +define void @test_gep_of_vscale_non_const_gep(ptr %p, i64 %idx) { +; CHECK-LABEL: @test_gep_of_vscale_non_const_gep( +; CHECK-NEXT: [[G1:%.*]] = getelementptr , ptr [[P:%.*]], i64 [[IDX:%.*]], i32 1 +; CHECK-NEXT: [[G2:%.*]] = getelementptr , ptr [[P]], i64 [[IDX]], i32 5 +; CHECK-NEXT: [[H1:%.*]] = getelementptr i8, ptr [[G1]], i64 2 +; CHECK-NEXT: [[H2:%.*]] = getelementptr i8, ptr [[G2]], i64 6 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[H1]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret void +; + %g1 = getelementptr , ptr %p, i64 %idx, i32 1 + %g2 = getelementptr , ptr %p, i64 %idx, i32 5 + %h1 = getelementptr i8, ptr %g1, i64 2 + %h2 = getelementptr i8, ptr %g2, i64 6 + store i64 0, ptr %h1 + store i64 0, ptr %h2 + ret void +} + declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)