Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2484,13 +2484,12 @@ } if (VectorType *DestVTy = dyn_cast(DestTy)) { - if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { + // Beware: messing with this target-specific oddity will cause trouble. + if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) { Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType()); return InsertElementInst::Create(UndefValue::get(DestTy), Elem, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); - // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } - if (isa(SrcTy)) { // If this is a cast from an integer to vector, check to see if the input // is a trunc or zext of a bitcast from vector. If so, we can replace all Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1021,9 +1021,26 @@ VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE))) return replaceInstUsesWith(IE, V); + // If the scalar is bitcast and inserted into undef, do the insert in the + // source type followed by bitcast. + // TODO: Generalize for insert into any constant, not just undef? + Value *ScalarSrc; + if (match(VecOp, m_Undef()) && + match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) && + (ScalarSrc->getType()->isIntegerTy() || + ScalarSrc->getType()->isFloatingPointTy())) { + // inselt undef, (bitcast ScalarSrc), IdxOp --> + // bitcast (inselt undef, ScalarSrc, IdxOp) + Type *ScalarTy = ScalarSrc->getType(); + Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount()); + UndefValue *NewUndef = UndefValue::get(VecTy); + Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp); + return new BitCastInst(NewInsElt, IE.getType()); + } + // If the vector and scalar are both bitcast from the same element type, do // the insert in that source type followed by bitcast. - Value *VecSrc, *ScalarSrc; + Value *VecSrc; if (match(VecOp, m_BitCast(m_Value(VecSrc))) && match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) && (VecOp->hasOneUse() || ScalarOp->hasOneUse()) && Index: llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll =================================================================== --- llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll +++ llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll @@ -22,8 +22,7 @@ define <1 x i64> @c(double %y) { ; CHECK-LABEL: @c( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[Y:%.*]] to i64 -; CHECK-NEXT: [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0 +; CHECK-NEXT: [[C:%.*]] = bitcast double [[Y:%.*]] to <1 x i64> ; CHECK-NEXT: ret <1 x i64> [[C]] ; %c = bitcast double %y to <1 x i64> @@ -71,10 +70,12 @@ ret double %1 } +; FP source is ok. + define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef( -; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 -; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> undef, i64 [[XB]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[I:%.*]] = bitcast <3 x double> [[TMP1]] to <3 x i64> ; CHECK-NEXT: ret <3 x i64> [[I]] ; %xb = bitcast double %x to i64 @@ -82,10 +83,12 @@ ret <3 x i64> %i } +; Integer source is ok; index is anything. + define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_fp( -; CHECK-NEXT: [[XB:%.*]] = bitcast i32 [[X:%.*]] to float -; CHECK-NEXT: [[I:%.*]] = insertelement <3 x float> undef, float [[XB]], i567 [[IDX:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]] +; CHECK-NEXT: [[I:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float> ; CHECK-NEXT: ret <3 x float> [[I]] ; %xb = bitcast i32 %x to float @@ -95,6 +98,8 @@ declare void @use(i64) +; Negative test - extra use prevents canonicalization + define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_extra_use( ; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 @@ -108,6 +113,8 @@ ret <3 x i64> %i } +; Negative test - source type must be scalar + define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_vec_src( ; CHECK-NEXT: [[XB:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64 @@ -119,6 +126,8 @@ ret <3 x i64> %i } +; Negative test - source type must be scalar + define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_from_mmx( ; CHECK-NEXT: [[XB:%.*]] = bitcast x86_mmx [[X:%.*]] to i64 @@ -130,12 +139,13 @@ ret <3 x i64> %i } +; Reduce number of casts + define <2 x i64> @PR45748(double %x, double %y) { ; CHECK-LABEL: @PR45748( -; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 -; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i64> undef, i64 [[XB]], i32 0 -; CHECK-NEXT: [[YB:%.*]] = bitcast double [[Y:%.*]] to i64 -; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i64> [[I0]], i64 [[YB]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[Y:%.*]], i32 1 +; CHECK-NEXT: [[I1:%.*]] = bitcast <2 x double> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[I1]] ; %xb = bitcast double %x to i64