Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -383,9 +383,15 @@ auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ConstantVector::get(ExtendMask)); + // Insert the new shuffle after the vector operand of the extract is defined + // or at the start of the basic block, so any subsequent extracts can use it. + if (auto *ExtVecOpInst = dyn_cast(ExtVecOp)) + WideVec->insertAfter(ExtVecOpInst); + else + IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt()); + // Replace all extracts from the original narrow vector with extracts from // the new wide vector. - WideVec->insertBefore(ExtElt); for (User *U : ExtVecOp->users()) { if (ExtractElementInst *OldExt = dyn_cast(U)) { auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); Index: test/Transforms/InstCombine/insert-extract-shuffle.ll =================================================================== --- test/Transforms/InstCombine/insert-extract-shuffle.ll +++ test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -72,3 +72,20 @@ ret <8 x float> %i1 } +; PR26015: https://llvm.org/bugs/show_bug.cgi?id=26015 +define <8 x i16> @pr26015(<4 x i16> %t0) { +; CHECK-LABEL: @pr26015( +; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <4 x i16> %t0, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: %[[EXT:.*]] = extractelement <4 x i16> %t0, i32 2 +; CHECK-NEXT: %t2 = insertelement <8 x i16> , i16 %[[EXT]], i32 3 +; CHECK-NEXT: %t3 = insertelement <8 x i16> %t2, i16 0, i32 6 +; CHECK-NEXT: %t5 = shufflevector <8 x i16> %t3, <8 x i16> %[[WIDEVEC]], <8 x i32> +; CHECK-NEXT: ret <8 x i16> %t5 + %t1 = extractelement <4 x i16> %t0, i32 2 + %t2 = insertelement <8 x i16> zeroinitializer, i16 %t1, i32 3 + %t3 = insertelement <8 x i16> %t2, i16 0, i32 6 + %t4 = extractelement <4 x i16> %t0, i32 3 + %t5 = insertelement <8 x i16> %t3, i16 %t4, i32 7 + ret <8 x i16> %t5 +} +