diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -396,7 +396,7 @@ if (!SV.empty()) { for (unsigned I = 0, E = SV.size(); I != E; ++I) { Value *V = SV[I]; - if (V == nullptr) + if (V == nullptr || SV[I] == CV[I]) continue; Instruction *Old = cast(V); @@ -748,9 +748,6 @@ } bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) { - if (!ScalarizeVariableInsertExtract) - return false; - VectorType *VT = dyn_cast(IEI.getType()); if (!VT) return false; @@ -759,20 +756,27 @@ IRBuilder<> Builder(&IEI); Scatterer Op0 = scatter(&IEI, IEI.getOperand(0)); Value *NewElt = IEI.getOperand(1); - Value *InsIdx = IEI.getOperand(2); - if (isa(InsIdx)) - return false; ValueVector Res; Res.resize(NumElems); - for (unsigned I = 0; I < NumElems; ++I) { - Res[I] = Builder.CreateSelect( - Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I), - InsIdx->getName() + ".is." + Twine(I)), - NewElt, Op0[I], IEI.getName() + ".i" + Twine(I)); + if (auto *CI = dyn_cast(InsIdx)) { + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = CI->getValue().getZExtValue() == I ? NewElt : Op0[I]; + } else { + if (!ScalarizeVariableInsertExtract) + return false; + + for (unsigned I = 0; I < NumElems; ++I) { + Res[I] = Builder.CreateSelect( + Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I), + InsIdx->getName() + ".is." + Twine(I)), + NewElt, Op0[I], IEI.getName() + ".i" + Twine(I)); + } + Res.back()->takeName(&IEI); } + gather(&IEI, Res); return true; } diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll --- a/llvm/test/Transforms/Scalarizer/basic.ll +++ b/llvm/test/Transforms/Scalarizer/basic.ll @@ -276,14 +276,14 @@ ; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1 ; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2 ; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3 +; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0 +; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2 +; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3 ; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1 ; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3 -; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0 ; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100 ; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1 -; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2 ; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100 -; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3 ; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3 ; CHECK: store float* %val.i0, float** %dest.i0, align 32 ; CHECK: store float* %val.i1, float** %dest.i1, align 8 @@ -385,11 +385,11 @@ ; CHECK: %index.is.2 = icmp eq i32 %index, 2 ; CHECK: %val1.i2 = select i1 %index.is.2, i32 1, i32 %val0.i2 ; CHECK: %index.is.3 = icmp eq i32 %index, 3 -; CHECK: %val1.i3 = select i1 %index.is.3, i32 1, i32 %val0.i3 +; CHECK: %val1 = select i1 %index.is.3, i32 1, i32 %val0.i3 ; CHECK: %val2.i0 = shl i32 1, %val1.i0 ; CHECK: %val2.i1 = shl i32 2, %val1.i1 ; CHECK: %val2.i2 = shl i32 3, %val1.i2 -; CHECK: %val2.i3 = shl i32 4, %val1.i3 +; CHECK: %val2.i3 = shl i32 4, %val1 ; CHECK: store i32 %val2.i0, i32* %dest.i0, align 16 ; CHECK: store i32 %val2.i1, i32* %dest.i1, align 4 ; CHECK: store i32 %val2.i2, i32* %dest.i2, align 8 @@ -621,6 +621,30 @@ ret i32 %val2 } +; Test that constant inserts are nicely scalarized +define <4 x i32> @f25(<4 x i32> *%src, i32 %repl, i32 %index) { +; CHECK-LABEL: @f25( +; CHECK: %src.i0 = bitcast <4 x i32>* %src to i32* +; CHECK: %val0.i0 = load i32, i32* %src.i0, align 16 +; CHECK: %src.i1 = getelementptr i32, i32* %src.i0, i32 1 +; CHECK: %val0.i1 = load i32, i32* %src.i1, align 4 +; CHECK: %src.i2 = getelementptr i32, i32* %src.i0, i32 2 +; CHECK: %val0.i2 = load i32, i32* %src.i2, align 8 +; CHECK: %val2.i0 = shl i32 1, %val0.i0 +; CHECK: %val2.i1 = shl i32 2, %val0.i1 +; CHECK: %val2.i2 = shl i32 3, %val0.i2 +; CHECK: %val2.i3 = shl i32 4, %repl +; CHECK: %val2.upto0 = insertelement <4 x i32> undef, i32 %val2.i0, i32 0 +; CHECK: %val2.upto1 = insertelement <4 x i32> %val2.upto0, i32 %val2.i1, i32 1 +; CHECK: %val2.upto2 = insertelement <4 x i32> %val2.upto1, i32 %val2.i2, i32 2 +; CHECK: %val2 = insertelement <4 x i32> %val2.upto2, i32 %val2.i3, i32 3 +; CHECK: ret <4 x i32> %val2 + %val0 = load <4 x i32> , <4 x i32> *%src + %val1 = insertelement <4 x i32> %val0, i32 %repl, i32 3 + %val2 = shl <4 x i32> , %val1 + ret <4 x i32> %val2 +} + !0 = !{ !"root" } !1 = !{ !"set1", !0 } !2 = !{ !"set2", !0 } diff --git a/llvm/test/Transforms/Scalarizer/variable-insertelement.ll b/llvm/test/Transforms/Scalarizer/variable-insertelement.ll --- a/llvm/test/Transforms/Scalarizer/variable-insertelement.ll +++ b/llvm/test/Transforms/Scalarizer/variable-insertelement.ll @@ -19,12 +19,12 @@ ; DEFAULT-NEXT: [[RES_I2:%.*]] = select i1 [[INDEX_IS_2]], i32 [[VAL]], i32 [[SRC_I2]] ; DEFAULT-NEXT: [[INDEX_IS_3:%.*]] = icmp eq i32 [[INDEX]], 3 ; DEFAULT-NEXT: [[SRC_I3:%.*]] = extractelement <4 x i32> [[SRC]], i32 3 -; DEFAULT-NEXT: [[RES_I3:%.*]] = select i1 [[INDEX_IS_3]], i32 [[VAL]], i32 [[SRC_I3]] -; DEFAULT-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[RES_I0]], i32 0 -; DEFAULT-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 -; DEFAULT-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x i32> [[RES_UPTO1]], i32 [[RES_I2]], i32 2 -; DEFAULT-NEXT: [[RES:%.*]] = insertelement <4 x i32> [[RES_UPTO2]], i32 [[RES_I3]], i32 3 -; DEFAULT-NEXT: ret <4 x i32> [[RES]] +; DEFAULT-NEXT: [[RES:%.*]] = select i1 [[INDEX_IS_3]], i32 [[VAL]], i32 [[SRC_I3]] +; DEFAULT-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[RES_I0]], i32 0 +; DEFAULT-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[RES_I1]], i32 1 +; DEFAULT-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[RES_I2]], i32 2 +; DEFAULT-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[RES]], i32 3 +; DEFAULT-NEXT: ret <4 x i32> [[TMP1]] ; ; OFF-LABEL: @f1( ; OFF-NEXT: [[RES:%.*]] = insertelement <4 x i32> [[SRC:%.*]], i32 [[VAL:%.*]], i32 [[INDEX:%.*]]