diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1050,9 +1050,26 @@ VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE))) return replaceInstUsesWith(IE, V); + // If the scalar is bitcast and inserted into undef, do the insert in the + // source type followed by bitcast. + // TODO: Generalize for insert into any constant, not just undef? + Value *ScalarSrc; + if (match(VecOp, m_Undef()) && + match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) && + (ScalarSrc->getType()->isIntegerTy() || + ScalarSrc->getType()->isFloatingPointTy())) { + // inselt undef, (bitcast ScalarSrc), IdxOp --> + // bitcast (inselt undef, ScalarSrc, IdxOp) + Type *ScalarTy = ScalarSrc->getType(); + Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount()); + UndefValue *NewUndef = UndefValue::get(VecTy); + Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp); + return new BitCastInst(NewInsElt, IE.getType()); + } + // If the vector and scalar are both bitcast from the same element type, do // the insert in that source type followed by bitcast. - Value *VecSrc, *ScalarSrc; + Value *VecSrc; if (match(VecOp, m_BitCast(m_Value(VecSrc))) && match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) && (VecOp->hasOneUse() || ScalarOp->hasOneUse()) && diff --git a/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll b/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll --- a/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll +++ b/llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll @@ -70,10 +70,12 @@ ret double %1 } +; FP source is ok. + define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef( -; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 -; CHECK-NEXT: [[I:%.*]] = insertelement <3 x i64> undef, i64 [[XB]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[I:%.*]] = bitcast <3 x double> [[TMP1]] to <3 x i64> ; CHECK-NEXT: ret <3 x i64> [[I]] ; %xb = bitcast double %x to i64 @@ -81,10 +83,12 @@ ret <3 x i64> %i } +; Integer source is ok; index is anything. + define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_fp( -; CHECK-NEXT: [[XB:%.*]] = bitcast i32 [[X:%.*]] to float -; CHECK-NEXT: [[I:%.*]] = insertelement <3 x float> undef, float [[XB]], i567 [[IDX:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]] +; CHECK-NEXT: [[I:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float> ; CHECK-NEXT: ret <3 x float> [[I]] ; %xb = bitcast i32 %x to float @@ -92,8 +96,21 @@ ret <3 x float> %i } +define @bitcast_inselt_undef_vscale(i32 %x, i567 %idx) { +; CHECK-LABEL: @bitcast_inselt_undef_vscale( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement undef, i32 [[X:%.*]], i567 [[IDX:%.*]] +; CHECK-NEXT: [[I:%.*]] = bitcast [[TMP1]] to +; CHECK-NEXT: ret [[I]] +; + %xb = bitcast i32 %x to float + %i = insertelement undef, float %xb, i567 %idx + ret %i +} + declare void @use(i64) +; Negative test - extra use prevents canonicalization + define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_extra_use( ; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 @@ -107,6 +124,8 @@ ret <3 x i64> %i } +; Negative test - source type must be scalar + define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_vec_src( ; CHECK-NEXT: [[XB:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64 @@ -118,6 +137,8 @@ ret <3 x i64> %i } +; Negative test - source type must be scalar + define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) { ; CHECK-LABEL: @bitcast_inselt_undef_from_mmx( ; CHECK-NEXT: [[XB:%.*]] = bitcast x86_mmx [[X:%.*]] to i64 @@ -129,12 +150,13 @@ ret <3 x i64> %i } +; Reduce number of casts + define <2 x i64> @PR45748(double %x, double %y) { ; CHECK-LABEL: @PR45748( -; CHECK-NEXT: [[XB:%.*]] = bitcast double [[X:%.*]] to i64 -; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i64> undef, i64 [[XB]], i32 0 -; CHECK-NEXT: [[YB:%.*]] = bitcast double [[Y:%.*]] to i64 -; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i64> [[I0]], i64 [[YB]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[Y:%.*]], i32 1 +; CHECK-NEXT: [[I1:%.*]] = bitcast <2 x double> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[I1]] ; %xb = bitcast double %x to i64