Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2484,13 +2484,6 @@
   }
 
   if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
-    if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
-      Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
-      return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
-                     Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
-      // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
-    }
-
     if (isa<IntegerType>(SrcTy)) {
       // If this is a cast from an integer to vector, check to see if the input
       // is a trunc or zext of a bitcast from vector.  If so, we can replace all
Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1021,9 +1021,26 @@
           VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
     return replaceInstUsesWith(IE, V);
 
+  // If the scalar is bitcast and inserted into undef, do the insert in the
+  // source type followed by bitcast.
+  // TODO: Generalize for insert into any constant, not just undef?
+  Value *ScalarSrc;
+  if (match(VecOp, m_Undef()) &&
+      match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
+      (ScalarSrc->getType()->isIntegerTy() ||
+       ScalarSrc->getType()->isFloatingPointTy())) {
+    // inselt undef, (bitcast ScalarSrc), IdxOp -->
+    //   bitcast (inselt undef, ScalarSrc, IdxOp)
+    Type *ScalarTy = ScalarSrc->getType();
+    Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
+    UndefValue *NewUndef = UndefValue::get(VecTy);
+    Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
+    return new BitCastInst(NewInsElt, IE.getType());
+  }
+
   // If the vector and scalar are both bitcast from the same element type, do
   // the insert in that source type followed by bitcast.
-  Value *VecSrc, *ScalarSrc;
+  Value *VecSrc;
   if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
       match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
       (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
Index: llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll
===================================================================
--- llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll
+++ llvm/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -22,8 +22,7 @@
 
 define <1 x i64> @c(double %y) {
 ; CHECK-LABEL: @c(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double [[Y:%.*]] to i64
-; CHECK-NEXT:    [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+; CHECK-NEXT:    [[C:%.*]] = bitcast double [[Y:%.*]] to <1 x i64>
 ; CHECK-NEXT:    ret <1 x i64> [[C]]
 ;
   %c = bitcast double %y to <1 x i64>
@@ -32,8 +31,8 @@
 
 define <1 x i64> @d(i64 %y) {
 ; CHECK-LABEL: @d(
-; CHECK-NEXT:    [[C:%.*]] = insertelement <1 x i64> undef, i64 [[Y:%.*]], i32 0
-; CHECK-NEXT:    ret <1 x i64> [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <1 x i64> undef, i64 [[Y:%.*]], i32 0
+; CHECK-NEXT:    ret <1 x i64> [[TMP1]]
 ;
   %c = bitcast i64 %y to <1 x i64>
   ret <1 x i64> %c
@@ -51,8 +50,7 @@
 
 define <1 x i64> @f(x86_mmx %y) {
 ; CHECK-LABEL: @f(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast x86_mmx [[Y:%.*]] to i64
-; CHECK-NEXT:    [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+; CHECK-NEXT:    [[C:%.*]] = bitcast x86_mmx [[Y:%.*]] to <1 x i64>
 ; CHECK-NEXT:    ret <1 x i64> [[C]]
 ;
   %c = bitcast x86_mmx %y to <1 x i64>
@@ -62,7 +60,8 @@
 define double @g(x86_mmx %x) {
 ; CHECK-LABEL: @g(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast x86_mmx [[X:%.*]] to double
+; CHECK-NEXT:    [[BC:%.*]] = bitcast x86_mmx [[X:%.*]] to <1 x double>
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x double> [[BC]], i32 0
 ; CHECK-NEXT:    ret double [[TMP0]]
 ;
 entry:
@@ -73,8 +72,8 @@
 
 define <3 x i64> @bitcast_inselt_undef(double %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef(
-; CHECK-NEXT:    [[XB:%.*]] = bitcast double [[X:%.*]] to i64
-; CHECK-NEXT:    [[I:%.*]] = insertelement <3 x i64> undef, i64 [[XB]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[I:%.*]] = bitcast <3 x double> [[TMP1]] to <3 x i64>
 ; CHECK-NEXT:    ret <3 x i64> [[I]]
 ;
   %xb = bitcast double %x to i64
@@ -82,10 +81,12 @@
   ret <3 x i64> %i
 }
 
+; FP source is ok; index is anything.
+
 define <3 x float> @bitcast_inselt_undef_fp(i32 %x, i567 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_fp(
-; CHECK-NEXT:    [[XB:%.*]] = bitcast i32 [[X:%.*]] to float
-; CHECK-NEXT:    [[I:%.*]] = insertelement <3 x float> undef, float [[XB]], i567 [[IDX:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[X:%.*]], i567 [[IDX:%.*]]
+; CHECK-NEXT:    [[I:%.*]] = bitcast <3 x i32> [[TMP1]] to <3 x float>
 ; CHECK-NEXT:    ret <3 x float> [[I]]
 ;
   %xb = bitcast i32 %x to float
@@ -95,6 +96,8 @@
 
 declare void @use(i64)
 
+; Negative test - extra use prevents canonicalization
+
 define <3 x i64> @bitcast_inselt_undef_extra_use(double %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_extra_use(
 ; CHECK-NEXT:    [[XB:%.*]] = bitcast double [[X:%.*]] to i64
@@ -108,6 +111,8 @@
   ret <3 x i64> %i
 }
 
+; Negative test - source type must be scalar
+
 define <3 x i64> @bitcast_inselt_undef_vec_src(<2 x i32> %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_vec_src(
 ; CHECK-NEXT:    [[XB:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64
@@ -119,6 +124,8 @@
   ret <3 x i64> %i
 }
 
+; Negative test - source type must be scalar
+
 define <3 x i64> @bitcast_inselt_undef_from_mmx(x86_mmx %x, i32 %idx) {
 ; CHECK-LABEL: @bitcast_inselt_undef_from_mmx(
 ; CHECK-NEXT:    [[XB:%.*]] = bitcast x86_mmx [[X:%.*]] to i64
@@ -130,12 +137,13 @@
   ret <3 x i64> %i
 }
 
+; Reduce number of casts
+
 define <2 x i64> @PR45748(double %x, double %y) {
 ; CHECK-LABEL: @PR45748(
-; CHECK-NEXT:    [[XB:%.*]] = bitcast double [[X:%.*]] to i64
-; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i64> undef, i64 [[XB]], i32 0
-; CHECK-NEXT:    [[YB:%.*]] = bitcast double [[Y:%.*]] to i64
-; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i64> [[I0]], i64 [[YB]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[Y:%.*]], i32 1
+; CHECK-NEXT:    [[I1:%.*]] = bitcast <2 x double> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[I1]]
 ;
   %xb = bitcast double %x to i64