diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -111,6 +111,7 @@ ValueVector *CachePtr; ValueVector Tmp; unsigned Size; + bool BroadcastedAddress; }; // FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp @@ -280,7 +281,8 @@ Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, Type *PtrElemTy, ValueVector *cachePtr) - : BB(bb), BBI(bbi), V(v), PtrElemTy(PtrElemTy), CachePtr(cachePtr) { + : BB(bb), BBI(bbi), V(v), PtrElemTy(PtrElemTy), CachePtr(cachePtr), + BroadcastedAddress(false) { Type *Ty = V->getType(); if (Ty->isPointerTy()) { assert(cast(Ty)->isOpaqueOrPointeeTypeMatches(PtrElemTy) && @@ -292,16 +294,31 @@ Tmp.resize(Size, nullptr); else if (CachePtr->empty()) CachePtr->resize(Size, nullptr); - else + else if (PtrElemTy && CachePtr->size() == 1) { + BroadcastedAddress = true; + } else assert(Size == CachePtr->size() && "Inconsistent vector sizes"); } // Return component I, creating a new Value for it if necessary. Value *Scatterer::operator[](unsigned I) { ValueVector &CV = (CachePtr ? *CachePtr : Tmp); + + // A broadcasted value is a value that already exists in a 1-element vector + // and is propagated across several elements for loading /storing. + if (BroadcastedAddress) { + assert(CV[0] && "value to broadcast"); + IRBuilder<> Builder(BB, BBI); + Type *VectorElemTy = cast(PtrElemTy)->getElementType(); + Type *NewPtrTy = PointerType::get( + VectorElemTy, CV[0]->getType()->getPointerAddressSpace()); + return Builder.CreateBitCast(CV[0], NewPtrTy, CV[0]->getName() + ".bcast"); + } + // Try to reuse a previous value. if (CV[I]) return CV[I]; + IRBuilder<> Builder(BB, BBI); if (PtrElemTy) { Type *VectorElemTy = cast(PtrElemTy)->getElementType(); @@ -432,6 +449,7 @@ PotentiallyDeadInstrs.emplace_back(Old); } } + assert((SV.empty() || SV.size() == CV.size()) && "consistent update"); SV = CV; Gathered.push_back(GatherList::value_type(Op, &SV)); } diff --git a/llvm/test/Transforms/Scalarizer/global-bug.ll b/llvm/test/Transforms/Scalarizer/global-bug.ll --- a/llvm/test/Transforms/Scalarizer/global-bug.ll +++ b/llvm/test/Transforms/Scalarizer/global-bug.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes='function(scalarizer)' -S | FileCheck %s @a = dso_local global i16 0, align 1 diff --git a/llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll b/llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/vector-of-pointer-to-vector.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +define <1 x i32> @f1(<1 x <1 x i32>*> %src, i32 %index) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x <1 x i32>*> [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], <1 x i32>* [[SRC_I0]], <1 x i32>* undef +; CHECK-NEXT: [[DOTUPTO0_BCAST:%.*]] = bitcast <1 x i32>* [[DOTUPTO0]] to i32* +; CHECK-NEXT: [[DOTI0:%.*]] = load i32, i32* [[DOTUPTO0_BCAST]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i32> poison, i32 [[DOTI0]], i32 0 +; CHECK-NEXT: ret <1 x i32> [[TMP1]] +; + %1 = extractelement <1 x <1 x i32>*> %src, i32 %index + %2 = load <1 x i32>, <1 x i32>* %1, align 4 + ret <1 x i32> %2 +} + +define <2 x i32> @f2(<1 x <2 x i32>*> %src, i32 %index) { +; CHECK-LABEL: @f2( +; CHECK-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x <2 x i32>*> [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], <2 x i32>* [[SRC_I0]], <2 x i32>* undef +; CHECK-NEXT: [[DOTUPTO0_BCAST:%.*]] = bitcast <2 x i32>* [[DOTUPTO0]] to i32* +; CHECK-NEXT: [[DOTI0:%.*]] = load i32, i32* [[DOTUPTO0_BCAST]], align 4 +; CHECK-NEXT: [[DOTUPTO0_BCAST1:%.*]] = bitcast <2 x i32>* [[DOTUPTO0]] to i32* +; CHECK-NEXT: [[DOTI1:%.*]] = load i32, i32* [[DOTUPTO0_BCAST1]], align 4 +; CHECK-NEXT: [[DOTUPTO02:%.*]] = insertelement <2 x i32> poison, i32 [[DOTI0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[DOTUPTO02]], i32 [[DOTI1]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; + %1 = extractelement <1 x <2 x i32>*> %src, i32 %index + %2 = load <2 x i32>, <2 x i32>* %1, align 4 + ret <2 x i32> %2 +} + +define void @f3(<1 x <2 x i32>*> %src, i32 %index, <2 x i32> %val) { +; CHECK-LABEL: @f3( +; CHECK-NEXT: [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i32 0 +; CHECK-NEXT: [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i32 1 +; CHECK-NEXT: [[INDEX_IS_0:%.*]] = icmp eq i32 [[INDEX:%.*]], 0 +; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <1 x <2 x i32>*> [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[DOTUPTO0:%.*]] = select i1 [[INDEX_IS_0]], <2 x i32>* [[SRC_I0]], <2 x i32>* undef +; CHECK-NEXT: [[DOTUPTO0_BCAST:%.*]] = bitcast <2 x i32>* [[DOTUPTO0]] to i32* +; CHECK-NEXT: store i32 [[VAL_I0]], i32* [[DOTUPTO0_BCAST]], align 4 +; CHECK-NEXT: [[DOTUPTO0_BCAST1:%.*]] = bitcast <2 x i32>* [[DOTUPTO0]] to i32* +; CHECK-NEXT: store i32 [[VAL_I1]], i32* [[DOTUPTO0_BCAST1]], align 4 +; CHECK-NEXT: ret void +; + %1 = extractelement <1 x <2 x i32>*> %src, i32 %index + store <2 x i32> %val, <2 x i32>* %1, align 4 + ret void +}