diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -192,6 +192,7 @@ bool visitGetElementPtrInst(GetElementPtrInst &GEPI); bool visitCastInst(CastInst &CI); bool visitBitCastInst(BitCastInst &BCI); + bool visitInsertElementInst(InsertElementInst &IEI); bool visitShuffleVectorInst(ShuffleVectorInst &SVI); bool visitPHINode(PHINode &PHI); bool visitLoadInst(LoadInst &LI); @@ -743,6 +744,33 @@ return true; } +bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) { + VectorType *VT = dyn_cast(IEI.getType()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + IRBuilder<> Builder(&IEI); + Scatterer Op0 = scatter(&IEI, IEI.getOperand(0)); + Value *NewElt = IEI.getOperand(1); + + Value *InsIdx = IEI.getOperand(2); + if (isa(InsIdx)) + return false; + + ValueVector Res; + Res.resize(NumElems); + + for (unsigned I = 0; I < NumElems; ++I) { + Res[I] = Builder.CreateSelect( + Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I), + InsIdx->getName() + ".is." + Twine(I)), + NewElt, Op0[I], IEI.getName() + ".i" + Twine(I)); + } + gather(&IEI, Res); + return true; +} + bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) { VectorType *VT = dyn_cast(SVI.getType()); if (!VT) diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll --- a/llvm/test/Transforms/Scalarizer/basic.ll +++ b/llvm/test/Transforms/Scalarizer/basic.ll @@ -363,19 +363,37 @@ ret void } -; Test that variable inserts aren't scalarized. +; Test that variable inserts are scalarized. define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) { -; CHECK: @f12( -; CHECK: %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index -; CHECK-DAG: %val1.i0 = extractelement <4 x i32> %val1, i32 0 -; CHECK-DAG: %val1.i1 = extractelement <4 x i32> %val1, i32 1 -; CHECK-DAG: %val1.i2 = extractelement <4 x i32> %val1, i32 2 -; CHECK-DAG: %val1.i3 = extractelement <4 x i32> %val1, i32 3 -; CHECK-DAG: %val2.i0 = shl i32 1, %val1.i0 -; CHECK-DAG: %val2.i1 = shl i32 2, %val1.i1 -; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2 -; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3 -; CHECK: ret void +; CHECK-LABEL: @f12( +; CHECK: %dest.i0 = bitcast <4 x i32>* %dest to i32* +; CHECK: %dest.i1 = getelementptr i32, i32* %dest.i0, i32 1 +; CHECK: %dest.i2 = getelementptr i32, i32* %dest.i0, i32 2 +; CHECK: %dest.i3 = getelementptr i32, i32* %dest.i0, i32 3 +; CHECK: %src.i0 = bitcast <4 x i32>* %src to i32* +; CHECK: %val0.i0 = load i32, i32* %src.i0, align 16 +; CHECK: %src.i1 = getelementptr i32, i32* %src.i0, i32 1 +; CHECK: %val0.i1 = load i32, i32* %src.i1, align 4 +; CHECK: %src.i2 = getelementptr i32, i32* %src.i0, i32 2 +; CHECK: %val0.i2 = load i32, i32* %src.i2, align 8 +; CHECK: %src.i3 = getelementptr i32, i32* %src.i0, i32 3 +; CHECK: %val0.i3 = load i32, i32* %src.i3, align 4 +; CHECK: %index.is.0 = icmp eq i32 %index, 0 +; CHECK: %val1.i0 = select i1 %index.is.0, i32 1, i32 %val0.i0 +; CHECK: %index.is.1 = icmp eq i32 %index, 1 +; CHECK: %val1.i1 = select i1 %index.is.1, i32 1, i32 %val0.i1 +; CHECK: %index.is.2 = icmp eq i32 %index, 2 +; CHECK: %val1.i2 = select i1 %index.is.2, i32 1, i32 %val0.i2 +; CHECK: %index.is.3 = icmp eq i32 %index, 3 +; CHECK: %val1.i3 = select i1 %index.is.3, i32 1, i32 %val0.i3 +; CHECK: %val2.i0 = shl i32 1, %val1.i0 +; CHECK: %val2.i1 = shl i32 2, %val1.i1 +; CHECK: %val2.i2 = shl i32 3, %val1.i2 +; CHECK: %val2.i3 = shl i32 4, %val1.i3 +; CHECK: store i32 %val2.i0, i32* %dest.i0, align 16 +; CHECK: store i32 %val2.i1, i32* %dest.i1, align 4 +; CHECK: store i32 %val2.i2, i32* %dest.i2, align 8 +; CHECK: store i32 %val2.i3, i32* %dest.i3, align 4 %val0 = load <4 x i32> , <4 x i32> *%src %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index %val2 = shl <4 x i32> , %val1