Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2484,13 +2484,6 @@ } if (VectorType *DestVTy = dyn_cast(DestTy)) { - if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { - Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType()); - return InsertElementInst::Create(UndefValue::get(DestTy), Elem, - Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); - // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) - } - if (isa(SrcTy)) { // If this is a cast from an integer to vector, check to see if the input // is a trunc or zext of a bitcast from vector. If so, we can replace all Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1021,9 +1021,26 @@ VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE))) return replaceInstUsesWith(IE, V); + // If the scalar is bitcast and inserted into undef, do the insert in the + // source type followed by bitcast. + // TODO: Generalize for insert into any constant, not just undef? + Value *ScalarSrc; + if (match(VecOp, m_Undef()) && + match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) && + (ScalarSrc->getType()->isIntegerTy() || + ScalarSrc->getType()->isFloatingPointTy())) { + // inselt undef, (bitcast ScalarSrc), IdxOp --> + // bitcast (inselt undef, ScalarSrc, IdxOp) + Type *ScalarTy = ScalarSrc->getType(); + Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount()); + UndefValue *NewUndef = UndefValue::get(VecTy); + Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp); + return new BitCastInst(NewInsElt, IE.getType()); + } + // If the vector and scalar are both bitcast from the same element type, do // the insert in that source type followed by bitcast. - Value *VecSrc, *ScalarSrc; + Value *VecSrc; if (match(VecOp, m_BitCast(m_Value(VecSrc))) && match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) && (VecOp->hasOneUse() || ScalarOp->hasOneUse()) && Index: llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll =================================================================== --- llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll +++ llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll @@ -22,8 +22,7 @@ define <1 x i64> @c(double %y) { ; CHECK-LABEL: @c( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[Y:%.*]] to i64 -; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0 +; CHECK-NEXT: [[C:%.*]] = bitcast double [[Y:%.*]] to <1 x i64> ; CHECK-NEXT: ret <1 x i64> [[C]] ; %c = bitcast double %y to <1 x i64> @@ -32,8 +31,8 @@ define <1 x i64> @d(i64 %y) { ; CHECK-LABEL: @d( -; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[Y:%.*]], i32 0 -; CHECK-NEXT: ret <1 x i64> [[C]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <1 x i64> undef, i64 [[Y:%.*]], i32 0 +; CHECK-NEXT: ret <1 x i64> [[TMP1]] ; %c = bitcast i64 %y to <1 x i64> ret <1 x i64> %c @@ -51,8 +50,7 @@ define <1 x i64> @f(x86_mmx %y) { ; CHECK-LABEL: @f( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast x86_mmx [[Y:%.*]] to i64 -; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0 +; CHECK-NEXT: [[C:%.*]] = bitcast x86_mmx [[Y:%.*]] to <1 x i64> ; CHECK-NEXT: ret <1 x i64> [[C]] ; %c = bitcast x86_mmx %y to <1 x i64> @@ -62,7 +60,8 @@ define double @g(x86_mmx %x) { ; CHECK-LABEL: @g( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast x86_mmx [[X:%.*]] to double +; CHECK-NEXT: [[BC:%.*]] = bitcast x86_mmx [[X:%.*]] to <1 x double> +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x double> [[BC]], i32 0 ; CHECK-NEXT: ret double [[TMP0]] ; entry: @@ -73,8 +72,8 @@ define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef( -; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 -; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> undef, i64 [[XB]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[I:%.*]] = bitcast <3 x double> [[TMP1]] to <3 x i64> ; CHECK-NEXT: ret <3 x i64> [[I]] ; %xb = bitcast double %x to i64 @@ -82,10 +81,12 @@ ret <3 x i64> %i } +; FP source is ok; index is anything. + define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_fp( -; CHECK-NEXT: [[XB:%.*]] = bitcast i32 [[X:%.*]] to float -; CHECK-NEXT: [[I:%.*]] = insertelement <3 x float> undef, float [[XB]], i567 [[IDX:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]] +; CHECK-NEXT: [[I:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float> ; CHECK-NEXT: ret <3 x float> [[I]] ; %xb = bitcast i32 %x to float @@ -95,6 +96,8 @@ declare void @use(i64) +; Negative test - extra use prevents canonicalization + define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_extra_use( ; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 @@ -108,6 +111,8 @@ ret <3 x i64> %i } +; Negative test - source type must be scalar + define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_vec_src( ; CHECK-NEXT: [[XB:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64 @@ -119,6 +124,8 @@ ret <3 x i64> %i } +; Negative test - source type must be scalar + define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_from_mmx( ; CHECK-NEXT: [[XB:%.*]] = bitcast x86_mmx [[X:%.*]] to i64 @@ -130,12 +137,13 @@ ret <3 x i64> %i } +; Reduce number of casts + define <2 x i64> @PR45748(double %x, double %y) { ; CHECK-LABEL: @PR45748( -; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 -; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i64> undef, i64 [[XB]], i32 0 -; CHECK-NEXT: [[YB:%.*]] = bitcast double [[Y:%.*]] to i64 -; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i64> [[I0]], i64 [[YB]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[Y:%.*]], i32 1 +; CHECK-NEXT: [[I1:%.*]] = bitcast <2 x double> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[I1]] ; %xb = bitcast double %x to i64