Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -824,6 +824,38 @@ if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE)) return Broadcast; + Value *X; + if (match(ScalarOp, m_OneUse(m_BitCast(m_Value(X)))) && + isa(VecOp)) { + auto *InsertIndex = dyn_cast(IdxOp); + auto *SrcVecTy = dyn_cast(X->getType()); + if (InsertIndex && SrcVecTy) { + // We're bitcasting from vector to scalar and then inserting into a larger + // undef vector. This is really a subvector extension with undef elements, + // so use a size-extending shuffle to avoid the scalar conversion: + // insert undef, (bitcast vType X to scalar), C --> + // bitcast (shuffle X, undef, Mask) + unsigned NumSrcElts = SrcVecTy->getNumElements(); + unsigned ExtRatio = IE.getType()->getBitWidth() / SrcVecTy->getBitWidth(); + unsigned NumMaskVals = NumSrcElts * ExtRatio; + SmallVector MaskValues(NumMaskVals); + for (unsigned i = 0; i != NumMaskVals; ++i) { + // The bitcast maps all of the elements of the source vector into one + // larger element of the resulting vector. All other elements are + // undefined. Example: + // insert <2 x i16> undef, (bitcast <2 x i8> X to i16), i32 0 --> + // bitcast (shuffle <2 x i8> X, undef, <0,1,-1,-1>) to <2 x i16> + if (i / NumSrcElts == InsertIndex->getZExtValue()) + MaskValues[i] = Builder.getInt32(i % NumSrcElts); + else + MaskValues[i] = UndefValue::get(Builder.getInt32Ty()); + } + Value *SV = Builder.CreateShuffleVector(X, UndefValue::get(SrcVecTy), + ConstantVector::get(MaskValues)); + return new BitCastInst(SV, IE.getType()); + } + } + return nullptr; } Index: test/Transforms/InstCombine/insert-extract-shuffle.ll =================================================================== --- test/Transforms/InstCombine/insert-extract-shuffle.ll +++ test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -283,3 +283,31 @@ %ret = select i1 %e, <4 x i32> %b, <4 x i32> zeroinitializer ret <4 x i32> %ret } + +; insert undef, (bitcast vType X to scalar), C --> bitcast (shuffle X, undef, Mask) +; PR34716 - https://bugs.llvm.org/show_bug.cgi?id=34716 + +define <2 x i64> @bitcast_vector_and_insert(<2 x float> %x){ +; CHECK-LABEL: @bitcast_vector_and_insert( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> %x, <2 x float> undef, <4 x i32> +; CHECK-NEXT: [[VEC:%.*]] = bitcast <4 x float> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[VEC]] +; + %bc = bitcast <2 x float> %x to i64 + %vec = insertelement <2 x i64> undef, i64 %bc, i32 0 + ret <2 x i64> %vec +} + +; Use weird types to show the shuffle mask is still correct for unusual cases. + +define <4 x i72> @bitcast_vector_and_insert_odd(<3 x i24> %x){ +; CHECK-LABEL: @bitcast_vector_and_insert_odd( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i24> %x, <3 x i24> undef, <12 x i32> +; CHECK-NEXT: [[VEC:%.*]] = bitcast <12 x i24> [[TMP1]] to <4 x i72> +; CHECK-NEXT: ret <4 x i72> [[VEC]] +; + %bc = bitcast <3 x i24> %x to i72 + %vec = insertelement <4 x i72> undef, i72 %bc, i32 2 + ret <4 x i72> %vec +} +