diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -192,6 +192,7 @@ bool visitGetElementPtrInst(GetElementPtrInst &GEPI); bool visitCastInst(CastInst &CI); bool visitBitCastInst(BitCastInst &BCI); + bool visitInsertElementInst(InsertElementInst &IEI); bool visitShuffleVectorInst(ShuffleVectorInst &SVI); bool visitPHINode(PHINode &PHI); bool visitLoadInst(LoadInst &LI); @@ -389,7 +390,7 @@ if (!SV.empty()) { for (unsigned I = 0, E = SV.size(); I != E; ++I) { Value *V = SV[I]; - if (V == nullptr) + if (V == nullptr || SV[I] == CV[I]) continue; Instruction *Old = cast(V); @@ -740,6 +741,31 @@ return true; } +bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) { + VectorType *VT = dyn_cast(IEI.getType()); + if (!VT) + return false; + + unsigned NumElems = VT->getNumElements(); + IRBuilder<> Builder(&IEI); + Scatterer Op0 = scatter(&IEI, IEI.getOperand(0)); + Value *NewElt = IEI.getOperand(1); + Value *InsIdx = IEI.getOperand(2); + + ValueVector Res; + Res.resize(NumElems); + + if (auto *CI = dyn_cast(InsIdx)) { + for (unsigned I = 0; I < NumElems; ++I) + Res[I] = CI->getValue().getZExtValue() == I ? NewElt : Op0[I]; + } else { + return false; + } + + gather(&IEI, Res); + return true; +} + bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) { VectorType *VT = dyn_cast(SVI.getType()); if (!VT) diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll --- a/llvm/test/Transforms/Scalarizer/basic.ll +++ b/llvm/test/Transforms/Scalarizer/basic.ll @@ -276,14 +276,14 @@ ; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1 ; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2 ; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3 +; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0 +; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2 +; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3 ; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1 ; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3 -; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0 ; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100 ; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1 -; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2 ; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100 -; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3 ; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3 ; CHECK: store float* %val.i0, float** %dest.i0, align 32 ; CHECK: store float* %val.i1, float** %dest.i1, align 8 diff --git a/llvm/test/Transforms/Scalarizer/constant-insertelement.ll b/llvm/test/Transforms/Scalarizer/constant-insertelement.ll --- a/llvm/test/Transforms/Scalarizer/constant-insertelement.ll +++ b/llvm/test/Transforms/Scalarizer/constant-insertelement.ll @@ -12,18 +12,9 @@ ; ALL-NEXT: [[VAL0_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4 ; ALL-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2 ; ALL-NEXT: [[VAL0_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8 -; ALL-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3 -; ALL-NEXT: [[VAL0_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4 -; ALL-NEXT: [[VAL0_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL0_I0]], i32 0 -; ALL-NEXT: [[VAL0_UPTO1:%.*]] = insertelement <4 x i32> [[VAL0_UPTO0]], i32 [[VAL0_I1]], i32 1 -; ALL-NEXT: [[VAL0_UPTO2:%.*]] = insertelement <4 x i32> [[VAL0_UPTO1]], i32 [[VAL0_I2]], i32 2 -; ALL-NEXT: [[VAL0:%.*]] = insertelement <4 x i32> [[VAL0_UPTO2]], i32 [[VAL0_I3]], i32 3 -; ALL-NEXT: [[VAL0_I01:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0 -; ALL-NEXT: [[VAL2_I0:%.*]] = shl i32 1, [[VAL0_I01]] -; ALL-NEXT: [[VAL0_I12:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1 -; ALL-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL0_I12]] -; ALL-NEXT: [[VAL0_I23:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2 -; ALL-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL0_I23]] +; ALL-NEXT: [[VAL2_I0:%.*]] = shl i32 1, [[VAL0_I0]] +; ALL-NEXT: [[VAL2_I1:%.*]] = shl i32 2, [[VAL0_I1]] +; ALL-NEXT: [[VAL2_I2:%.*]] = shl i32 3, [[VAL0_I2]] ; ALL-NEXT: [[VAL2_I3:%.*]] = shl i32 4, [[REPL:%.*]] ; ALL-NEXT: [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL2_I0]], i32 0 ; ALL-NEXT: [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1