diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -871,12 +871,9 @@
     case Instruction::IntToPtr:
     case Instruction::PtrToInt:
     case Instruction::Trunc:
-      if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free ||
-          TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free)
-        return TTI::TCC_Free;
-      break;
     case Instruction::BitCast:
-      if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free)
+      if (TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) ==
+          TTI::TCC_Free)
         return TTI::TCC_Free;
       break;
     case Instruction::FPExt:
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -692,6 +692,9 @@
   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                             TTI::TargetCostKind CostKind,
                             const Instruction *I = nullptr) {
+    if (BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I) == 0)
+      return 0;
+
     const TargetLoweringBase *TLI = getTLI();
     int ISD = TLI->InstructionOpcodeToISD(Opcode);
     assert(ISD && "Invalid opcode");
@@ -700,6 +703,8 @@
 
     unsigned SrcSize = SrcLT.second.getSizeInBits();
     unsigned DstSize = DstLT.second.getSizeInBits();
+    bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
+    bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
 
     switch (Opcode) {
     default:
@@ -710,8 +715,10 @@
         return 0;
       LLVM_FALLTHROUGH;
     case Instruction::BitCast:
-      // Bitcast between types that are legalized to the same type are free.
-      if (SrcLT.first == DstLT.first && SrcSize == DstSize)
+      // Bitcast between types that are legalized to the same type are free and
+      // assume int to/from ptr of the same size is also free.
+      if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
+          SrcSize == DstSize)
         return 0;
       break;
     case Instruction::ZExt:
@@ -748,10 +755,6 @@
 
     // Handle scalar conversions.
     if (!SrcVTy && !DstVTy) {
-      // Scalar bitcasts are usually free.
-      if (Opcode == Instruction::BitCast)
-        return 0;
-
       // Just check the op cost. If the operation is legal then assume it costs
       // 1.
       if (!TLI->isOperationExpand(ISD, DstLT.second))
diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll
--- a/llvm/test/Analysis/CostModel/AArch64/cast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll
@@ -677,12 +677,12 @@
 ; CHECK-LABEL: 'bitcasts'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %a = bitcast i32 undef to i32
diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll
--- a/llvm/test/Analysis/CostModel/ARM/cast.ll
+++ b/llvm/test/Analysis/CostModel/ARM/cast.ll
@@ -1868,56 +1868,56 @@
 ; CHECK-NEON-LABEL: 'bitcasts'
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-LABEL: 'bitcasts'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-LABEL: 'bitcasts'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-BASE-LABEL: 'bitcasts'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8R-LABEL: 'bitcasts'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %a = bitcast i32 undef to i32
diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -1571,7 +1571,7 @@
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
@@ -111,13 +111,15 @@
 ; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2
 ; CHECK-NEXT:    [[T3:%.*]] = bitcast float* [[T2]] to i64*
 ; CHECK-NEXT:    [[T4:%.*]] = load i64, i64* [[T3]], align 8
-; CHECK-NEXT:    [[T5:%.*]] = trunc i64 [[T1]] to i32
-; CHECK-NEXT:    [[T6:%.*]] = bitcast i32 [[T5]] to float
-; CHECK-NEXT:    [[T7:%.*]] = insertelement <4 x float> undef, float [[T6]], i32 0
 ; CHECK-NEXT:    [[T8:%.*]] = lshr i64 [[T1]], 32
-; CHECK-NEXT:    [[T9:%.*]] = trunc i64 [[T8]] to i32
-; CHECK-NEXT:    [[T10:%.*]] = bitcast i32 [[T9]] to float
-; CHECK-NEXT:    [[T11:%.*]] = insertelement <4 x float> [[T7]], float [[T10]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> undef, i64 [[T1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[T8]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; CHECK-NEXT:    [[T7:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; CHECK-NEXT:    [[T11:%.*]] = insertelement <4 x float> [[T7]], float [[TMP6]], i32 1
 ; CHECK-NEXT:    [[T12:%.*]] = trunc i64 [[T4]] to i32
 ; CHECK-NEXT:    [[T13:%.*]] = bitcast i32 [[T12]] to float
 ; CHECK-NEXT:    [[T14:%.*]] = insertelement <4 x float> [[T11]], float [[T13]], i32 2