diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -684,7 +684,8 @@ /// is a simple API that does not depend on the analysis pass. /// \param StrictCheck Ensure that the calculated distance matches the /// type-based one after all the bitcasts removal in the provided pointers. -Optional getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL, +Optional getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, + Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck = false, bool CheckType = true); @@ -698,7 +699,7 @@ /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and /// saves the mask for actual memory accesses in program order in /// \p SortedIndices as <1,2,0,3> -bool sortPtrAccesses(ArrayRef VL, const DataLayout &DL, +bool sortPtrAccesses(ArrayRef VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl &SortedIndices); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1124,16 +1124,22 @@ return Stride; } -Optional llvm::getPointersDiff(Value *PtrA, Value *PtrB, - const DataLayout &DL, ScalarEvolution &SE, - bool StrictCheck, bool CheckType) { +Optional llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, + Value *PtrB, const DataLayout &DL, + ScalarEvolution &SE, bool StrictCheck, + bool CheckType) { assert(PtrA && PtrB && "Expected non-nullptr pointers."); + assert(cast(PtrA->getType()) + ->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type"); + assert(cast(PtrB->getType()) + ->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type"); + // Make sure that A and B are different pointers. if (PtrA == PtrB) return 0; - // Make sure that PtrA and PtrB have the same type if required - if (CheckType && PtrA->getType() != PtrB->getType()) + // Make sure that the element types are the same if required. + if (CheckType && ElemTyA != ElemTyB) return None; unsigned ASA = PtrA->getType()->getPointerAddressSpace(); @@ -1174,8 +1180,7 @@ return None; Val = Diff->getAPInt().getSExtValue(); } - Type *Ty = cast(PtrA->getType())->getElementType(); - int Size = DL.getTypeStoreSize(Ty); + int Size = DL.getTypeStoreSize(ElemTyA); int Dist = Val / Size; // Ensure that the calculated distance matches the type-based one after all @@ -1185,8 +1190,8 @@ return None; } -bool llvm::sortPtrAccesses(ArrayRef VL, const DataLayout &DL, - ScalarEvolution &SE, +bool llvm::sortPtrAccesses(ArrayRef VL, Type *ElemTy, + const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl &SortedIndices) { assert(llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && @@ -1204,8 +1209,8 @@ int Cnt = 1; bool IsConsecutive = true; for (auto *Ptr : VL.drop_front()) { - Optional Diff = - getPointersDiff(Ptr0, Ptr, DL, SE, /*StrictCheck=*/true); + Optional Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE, + /*StrictCheck=*/true); if (!Diff) return false; @@ -1238,8 +1243,10 @@ Value *PtrB = getLoadStorePointerOperand(B); if (!PtrA || !PtrB) return false; - Optional Diff = - getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true, CheckType); + Type *ElemTyA = getLoadStoreType(A); + Type *ElemTyB = getLoadStoreType(B); + Optional Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE, + /*StrictCheck=*/true, CheckType); return Diff && *Diff == 1; } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -987,9 +987,9 @@ if (LI1->getParent() != LI2->getParent()) return VLOperands::ScoreFail; - Optional Dist = - getPointersDiff(LI1->getPointerOperand(), LI2->getPointerOperand(), - DL, SE, /*StrictCheck=*/true); + Optional Dist = getPointersDiff( + LI1->getType(), LI1->getPointerOperand(), LI2->getType(), + LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true); return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads : VLOperands::ScoreFail; } @@ -2968,7 +2968,7 @@ OrdersType CurrentOrder; // Check the order of pointer operands. - if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) { + if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) { Value *Ptr0; Value *PtrN; if (CurrentOrder.empty()) { @@ -2978,7 +2978,8 @@ Ptr0 = PointerOps[CurrentOrder.front()]; PtrN = PointerOps[CurrentOrder.back()]; } - Optional Diff = getPointersDiff(Ptr0, PtrN, *DL, *SE); + Optional Diff = getPointersDiff( + ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE); // Check that the sorted loads are consecutive. if (static_cast(*Diff) == VL.size() - 1) { if (CurrentOrder.empty()) { @@ -3243,7 +3244,7 @@ OrdersType CurrentOrder; // Check the order of pointer operands. - if (llvm::sortPtrAccesses(PointerOps, *DL, *SE, CurrentOrder)) { + if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) { Value *Ptr0; Value *PtrN; if (CurrentOrder.empty()) { @@ -3253,7 +3254,8 @@ Ptr0 = PointerOps[CurrentOrder.front()]; PtrN = PointerOps[CurrentOrder.back()]; } - Optional Dist = getPointersDiff(Ptr0, PtrN, *DL, *SE); + Optional Dist = + getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE); // Check that the sorted pointer operands are consecutive. if (static_cast(*Dist) == VL.size() - 1) { if (CurrentOrder.empty()) { @@ -6893,9 +6895,10 @@ ++IterCnt; CheckedPairs[Idx].set(K); CheckedPairs[K].set(Idx); - Optional Diff = getPointersDiff(Stores[K]->getPointerOperand(), - Stores[Idx]->getPointerOperand(), *DL, - *SE, /*StrictCheck=*/true); + Optional Diff = getPointersDiff( + Stores[K]->getValueOperand()->getType(), Stores[K]->getPointerOperand(), + Stores[Idx]->getValueOperand()->getType(), + Stores[Idx]->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true); if (!Diff || *Diff == 0) return false; int Val = *Diff; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer -mtriple=x86_64-apple-macosx -mcpu=haswell < %s | FileCheck %s + +define void @test(ptr %r, ptr %p, ptr %q) #0 { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3 +; CHECK-NEXT: [[Q0:%.*]] = getelementptr inbounds i64, ptr [[Q:%.*]], i64 0 +; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 1 +; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 2 +; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[P0]] to <4 x i64>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[Q0]] to <4 x i64>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <4 x i64> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP9]] +; CHECK-NEXT: ret void +; + %p0 = getelementptr inbounds i64, ptr %p, i64 0 + %p1 = getelementptr inbounds i64, ptr %p, i64 1 + %p2 = getelementptr inbounds i64, ptr %p, i64 2 + %p3 = getelementptr inbounds i64, ptr %p, i64 3 + + %q0 = getelementptr inbounds i64, ptr %q, i64 0 + %q1 = getelementptr inbounds i64, ptr %q, i64 1 + %q2 = getelementptr inbounds i64, ptr %q, i64 2 + %q3 = getelementptr inbounds i64, ptr %q, i64 3 + + %x0 = load i64, ptr %p0, align 2 + %x1 = load i64, ptr %p1, align 2 + %x2 = load i64, ptr %p2, align 2 + %x3 = load i64, ptr %p3, align 2 + + %y0 = load i64, ptr %q0, align 2 + %y1 = load i64, ptr %q1, align 2 + %y2 = load i64, ptr %q2, align 2 + %y3 = load i64, ptr %q3, align 2 + + %sub0 = sub nsw i64 %x0, %y0 + %sub1 = sub nsw i64 %x1, %y1 + %sub2 = sub nsw i64 %x2, %y2 + %sub3 = sub nsw i64 %x3, %y3 + + %g0 = getelementptr inbounds i32, ptr %r, i64 %sub0 + %g1 = getelementptr inbounds i32, ptr %r, i64 %sub1 + %g2 = getelementptr inbounds i32, ptr %r, i64 %sub2 + %g3 = getelementptr inbounds i32, ptr %r, i64 %sub3 + ret void +}