diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -871,12 +871,9 @@ case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::Trunc: - if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free || - TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free) - return TTI::TCC_Free; - break; case Instruction::BitCast: - if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free) + if (TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == + TTI::TCC_Free) return TTI::TCC_Free; break; case Instruction::FPExt: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -692,6 +692,9 @@ unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { + if (BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I) == 0) + return 0; + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -700,6 +703,8 @@ unsigned SrcSize = SrcLT.second.getSizeInBits(); unsigned DstSize = DstLT.second.getSizeInBits(); + bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy(); + bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy(); switch (Opcode) { default: @@ -710,8 +715,10 @@ return 0; LLVM_FALLTHROUGH; case Instruction::BitCast: - // Bitcast between types that are legalized to the same type are free. - if (SrcLT.first == DstLT.first && SrcSize == DstSize) + // Bitcast between types that are legalized to the same type are free and + // assume int to/from ptr of the same size is also free. + if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst && + SrcSize == DstSize) return 0; break; case Instruction::ZExt: @@ -748,10 +755,6 @@ // Handle scalar conversions. if (!SrcVTy && !DstVTy) { - // Scalar bitcasts are usually free. - if (Opcode == Instruction::BitCast) - return 0; - // Just check the op cost. If the operation is legal then assume it costs // 1. if (!TLI->isOperationExpand(ISD, DstLT.second)) diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll --- a/llvm/test/Analysis/CostModel/AArch64/cast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll @@ -677,12 +677,12 @@ ; CHECK-LABEL: 'bitcasts' ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %a = bitcast i32 undef to i32 diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -1868,56 +1868,56 @@ ; CHECK-NEON-LABEL: 'bitcasts' ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16 -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-LABEL: 'bitcasts' ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-LABEL: 'bitcasts' ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16 -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-LABEL: 'bitcasts' ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16 -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8R-LABEL: 'bitcasts' ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16 -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32 +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %a = bitcast i32 undef to i32 diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -1571,7 +1571,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> ; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -111,13 +111,15 @@ ; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2 ; CHECK-NEXT: [[T3:%.*]] = bitcast float* [[T2]] to i64* ; CHECK-NEXT: [[T4:%.*]] = load i64, i64* [[T3]], align 8 -; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T1]] to i32 -; CHECK-NEXT: [[T6:%.*]] = bitcast i32 [[T5]] to float -; CHECK-NEXT: [[T7:%.*]] = insertelement <4 x float> undef, float [[T6]], i32 0 ; CHECK-NEXT: [[T8:%.*]] = lshr i64 [[T1]], 32 -; CHECK-NEXT: [[T9:%.*]] = trunc i64 [[T8]] to i32 -; CHECK-NEXT: [[T10:%.*]] = bitcast i32 [[T9]] to float -; CHECK-NEXT: [[T11:%.*]] = insertelement <4 x float> [[T7]], float [[T10]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> undef, i64 [[T1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[T8]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0 +; CHECK-NEXT: [[T7:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1 +; CHECK-NEXT: [[T11:%.*]] = insertelement <4 x float> [[T7]], float [[TMP6]], i32 1 ; CHECK-NEXT: [[T12:%.*]] = trunc i64 [[T4]] to i32 ; CHECK-NEXT: [[T13:%.*]] = bitcast i32 [[T12]] to float ; CHECK-NEXT: [[T14:%.*]] = insertelement <4 x float> [[T11]], float [[T13]], i32 2