Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -315,6 +315,8 @@ } // Else, assume that we need to scalarize this op. + // TODO: If one of the types get legalized by splitting, handle this + // similarly to what getCastInstrCost() does. if (Ty->isVectorTy()) { unsigned Num = Ty->getVectorNumElements(); unsigned Cost = static_cast(this) @@ -409,12 +411,25 @@ return SrcLT.first * 1; } - // If we are converting vectors and the operation is illegal, or - // if the vectors are legalized to different types, estimate the - // scalarization costs. - // TODO: This is probably a big overestimate. For splits, we should have - // something like getTypeLegalizationCost() + 2 * getCastInstrCost(). - // The same applies to getCmpSelInstrCost() and getArithmeticInstrCost() + // If we are legalizing by splitting, query the concrete TTI for the cost + // of casting the original vector twice. We also need to factor int the + // cost of the split itself. Count that as 1, to be consistent with + // TLI->getTypeLegalizationCost(). + if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == + TargetLowering::TypeSplitVector) || + (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == + TargetLowering::TypeSplitVector)) { + Type *SplitDst = VectorType::get(Dst->getVectorElementType(), + Dst->getVectorNumElements() / 2); + Type *SplitSrc = VectorType::get(Src->getVectorElementType(), + Src->getVectorNumElements() / 2); + T *TTI = static_cast(this); + return TTI->getVectorSplitCost() + + (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc)); + } + + // In other cases where the source or destination are illegal, assume + // the operation will get scalarized. unsigned Num = Dst->getVectorNumElements(); unsigned Cost = static_cast(this)->getCastInstrCost( Opcode, Dst->getScalarType(), Src->getScalarType()); @@ -472,6 +487,8 @@ } // Otherwise, assume that the cast is scalarized. + // TODO: If one of the types get legalized by splitting, handle this + // similarly to what getCastInstrCost() does. if (ValTy->isVectorTy()) { unsigned Num = ValTy->getVectorNumElements(); if (CondTy) @@ -480,8 +497,7 @@ Opcode, ValTy->getScalarType(), CondTy); // Return the cost of multiple scalar invocation plus the cost of - // inserting - // and extracting the values. + // inserting and extracting the values. return getScalarizationOverhead(ValTy, true, false) + Num * Cost; } @@ -906,6 +922,8 @@ return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); } + unsigned getVectorSplitCost() { return 1; } + /// @} }; Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -94,6 +94,8 @@ int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); bool isSourceOfDivergence(const Value *V) const; + + unsigned getVectorSplitCost() { return 0; } }; } // end namespace llvm Index: test/Analysis/CostModel/ARM/cast.ll =================================================================== --- test/Analysis/CostModel/ARM/cast.ll +++ test/Analysis/CostModel/ARM/cast.ll @@ -264,39 +264,39 @@ %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> %r117 = fptosi <4 x float> undef to <4 x i32> - ; CHECK: Found an estimated cost of 64 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> + ; CHECK: Found an estimated cost of 65 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> %r118 = fptoui <4 x float> undef to <4 x i64> - ; CHECK: Found an estimated cost of 64 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> + ; CHECK: Found an estimated cost of 65 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> %r119 = fptosi <4 x float> undef to <4 x i64> - ; CHECK: Found an estimated cost of 32 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> + ; CHECK: Found an estimated cost of 33 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> %r120 = fptoui <4 x double> undef to <4 x i1> - ; CHECK: Found an estimated cost of 32 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> + ; CHECK: Found an estimated cost of 33 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> %r121 = fptosi <4 x double> undef to <4 x i1> - ; CHECK: Found an estimated cost of 32 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> + ; CHECK: Found an estimated cost of 33 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> %r122 = fptoui <4 x double> undef to <4 x i8> - ; CHECK: Found an estimated cost of 32 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> + ; CHECK: Found an estimated cost of 33 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> %r123 = fptosi <4 x double> undef to <4 x i8> - ; CHECK: Found an estimated cost of 32 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> + ; CHECK: Found an estimated cost of 33 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> %r124 = fptoui <4 x double> undef to <4 x i16> - ; CHECK: Found an estimated cost of 32 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> + ; CHECK: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> %r125 = fptosi <4 x double> undef to <4 x i16> - ; CHECK: Found an estimated cost of 32 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> + ; CHECK: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> %r126 = fptoui <4 x double> undef to <4 x i32> - ; CHECK: Found an estimated cost of 32 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> + ; CHECK: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> %r127 = fptosi <4 x double> undef to <4 x i32> - ; CHECK: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> + ; CHECK: Found an estimated cost of 65 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> %r128 = fptoui <4 x double> undef to <4 x i64> - ; CHECK: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> + ; CHECK: Found an estimated cost of 65 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> %r129 = fptosi <4 x double> undef to <4 x i64> - ; CHECK: Found an estimated cost of 64 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> + ; CHECK: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> %r130 = fptoui <8 x float> undef to <8 x i1> - ; CHECK: Found an estimated cost of 64 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> + ; CHECK: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> %r131 = fptosi <8 x float> undef to <8 x i1> - ; CHECK: Found an estimated cost of 64 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> + ; CHECK: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> %r132 = fptoui <8 x float> undef to <8 x i8> - ; CHECK: Found an estimated cost of 64 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> + ; CHECK: Found an estimated cost of 7 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8> %r133 = fptosi <8 x float> undef to <8 x i8> ; CHECK: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16> %r134 = fptoui <8 x float> undef to <8 x i16> @@ -306,39 +306,39 @@ %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> %r137 = fptosi <8 x float> undef to <8 x i32> - ; CHECK: Found an estimated cost of 128 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> + ; CHECK: Found an estimated cost of 131 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> %r138 = fptoui <8 x float> undef to <8 x i64> - ; CHECK: Found an estimated cost of 128 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> + ; CHECK: Found an estimated cost of 131 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> %r139 = fptosi <8 x float> undef to <8 x i64> - ; CHECK: Found an estimated cost of 64 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> + ; CHECK: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> %r140 = fptoui <8 x double> undef to <8 x i1> - ; CHECK: Found an estimated cost of 64 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> + ; CHECK: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> %r141 = fptosi <8 x double> undef to <8 x i1> - ; CHECK: Found an estimated cost of 64 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> + ; CHECK: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> %r142 = fptoui <8 x double> undef to <8 x i8> - ; CHECK: Found an estimated cost of 64 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> + ; CHECK: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> %r143 = fptosi <8 x double> undef to <8 x i8> - ; CHECK: Found an estimated cost of 64 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> + ; CHECK: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> %r144 = fptoui <8 x double> undef to <8 x i16> - ; CHECK: Found an estimated cost of 64 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> + ; CHECK: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> %r145 = fptosi <8 x double> undef to <8 x i16> - ; CHECK: Found an estimated cost of 64 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> + ; CHECK: Found an estimated cost of 11 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> %r146 = fptoui <8 x double> undef to <8 x i32> - ; CHECK: Found an estimated cost of 64 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> + ; CHECK: Found an estimated cost of 11 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> %r147 = fptosi <8 x double> undef to <8 x i32> - ; CHECK: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> + ; CHECK: Found an estimated cost of 131 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> %r148 = fptoui <8 x double> undef to <8 x i64> - ; CHECK: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> + ; CHECK: Found an estimated cost of 131 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> %r149 = fptosi <8 x double> undef to <8 x i64> - ; CHECK: Found an estimated cost of 128 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> + ; CHECK: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> %r150 = fptoui <16 x float> undef to <16 x i1> - ; CHECK: Found an estimated cost of 128 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> + ; CHECK: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> %r151 = fptosi <16 x float> undef to <16 x i1> - ; CHECK: Found an estimated cost of 128 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> + ; CHECK: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> %r152 = fptoui <16 x float> undef to <16 x i8> - ; CHECK: Found an estimated cost of 128 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> + ; CHECK: Found an estimated cost of 15 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> %r153 = fptosi <16 x float> undef to <16 x i8> ; CHECK: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> %r154 = fptoui <16 x float> undef to <16 x i16> @@ -348,30 +348,30 @@ %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> %r157 = fptosi <16 x float> undef to <16 x i32> - ; CHECK: Found an estimated cost of 256 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> + ; CHECK: Found an estimated cost of 263 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> %r158 = fptoui <16 x float> undef to <16 x i64> - ; CHECK: Found an estimated cost of 256 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> + ; CHECK: Found an estimated cost of 263 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> %r159 = fptosi <16 x float> undef to <16 x i64> - ; CHECK: Found an estimated cost of 128 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> + ; CHECK: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> %r160 = fptoui <16 x double> undef to <16 x i1> - ; CHECK: Found an estimated cost of 128 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> + ; CHECK: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> %r161 = fptosi <16 x double> undef to <16 x i1> - ; CHECK: Found an estimated cost of 128 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> + ; CHECK: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> %r162 = fptoui <16 x double> undef to <16 x i8> - ; CHECK: Found an estimated cost of 128 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> + ; CHECK: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> %r163 = fptosi <16 x double> undef to <16 x i8> - ; CHECK: Found an estimated cost of 128 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> + ; CHECK: Found an estimated cost of 135 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> %r164 = fptoui <16 x double> undef to <16 x i16> - ; CHECK: Found an estimated cost of 128 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> + ; CHECK: Found an estimated cost of 135 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> %r165 = fptosi <16 x double> undef to <16 x i16> - ; CHECK: Found an estimated cost of 128 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> + ; CHECK: Found an estimated cost of 23 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> %r166 = fptoui <16 x double> undef to <16 x i32> - ; CHECK: Found an estimated cost of 128 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> + ; CHECK: Found an estimated cost of 23 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> %r167 = fptosi <16 x double> undef to <16 x i32> - ; CHECK: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> + ; CHECK: Found an estimated cost of 263 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> %r168 = fptoui <16 x double> undef to <16 x i64> - ; CHECK: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> + ; CHECK: Found an estimated cost of 263 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> @@ -432,39 +432,39 @@ %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> %r197 = sitofp <4 x i32> undef to <4 x float> - ; CHECK: Found an estimated cost of 56 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> + ; CHECK: Found an estimated cost of 57 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> %r198 = uitofp <4 x i64> undef to <4 x float> - ; CHECK: Found an estimated cost of 56 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> + ; CHECK: Found an estimated cost of 57 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> %r199 = sitofp <4 x i64> undef to <4 x float> - ; CHECK: Found an estimated cost of 16 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> + ; CHECK: Found an estimated cost of 17 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> %r200 = uitofp <4 x i1> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> + ; CHECK: Found an estimated cost of 17 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> %r201 = sitofp <4 x i1> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> + ; CHECK: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> %r202 = uitofp <4 x i8> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> + ; CHECK: Found an estimated cost of 9 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> %r203 = sitofp <4 x i8> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> + ; CHECK: Found an estimated cost of 7 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> %r204 = uitofp <4 x i16> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> + ; CHECK: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> %r205 = sitofp <4 x i16> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> + ; CHECK: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> %r206 = uitofp <4 x i32> undef to <4 x double> - ; CHECK: Found an estimated cost of 16 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> + ; CHECK: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> %r207 = sitofp <4 x i32> undef to <4 x double> - ; CHECK: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> + ; CHECK: Found an estimated cost of 49 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> %r208 = uitofp <4 x i64> undef to <4 x double> - ; CHECK: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> + ; CHECK: Found an estimated cost of 49 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> %r209 = sitofp <4 x i64> undef to <4 x double> - ; CHECK: Found an estimated cost of 48 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> + ; CHECK: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> %r210 = uitofp <8 x i1> undef to <8 x float> - ; CHECK: Found an estimated cost of 48 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> + ; CHECK: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> %r211 = sitofp <8 x i1> undef to <8 x float> - ; CHECK: Found an estimated cost of 48 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> + ; CHECK: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> %r212 = uitofp <8 x i8> undef to <8 x float> - ; CHECK: Found an estimated cost of 48 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> + ; CHECK: Found an estimated cost of 7 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float> %r213 = sitofp <8 x i8> undef to <8 x float> ; CHECK: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float> %r214 = uitofp <8 x i16> undef to <8 x float> @@ -474,39 +474,39 @@ %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> %r217 = sitofp <8 x i32> undef to <8 x float> - ; CHECK: Found an estimated cost of 112 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> + ; CHECK: Found an estimated cost of 115 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> %r218 = uitofp <8 x i64> undef to <8 x float> - ; CHECK: Found an estimated cost of 112 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> + ; CHECK: Found an estimated cost of 115 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> %r219 = sitofp <8 x i64> undef to <8 x float> - ; CHECK: Found an estimated cost of 32 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> + ; CHECK: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> %r220 = uitofp <8 x i1> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> + ; CHECK: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> %r221 = sitofp <8 x i1> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> + ; CHECK: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> %r222 = uitofp <8 x i8> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> + ; CHECK: Found an estimated cost of 19 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> %r223 = sitofp <8 x i8> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> + ; CHECK: Found an estimated cost of 15 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> %r224 = uitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> + ; CHECK: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> %r225 = sitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> + ; CHECK: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> %r226 = uitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 32 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> + ; CHECK: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> %r227 = sitofp <8 x i16> undef to <8 x double> - ; CHECK: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> + ; CHECK: Found an estimated cost of 99 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> %r228 = uitofp <8 x i64> undef to <8 x double> - ; CHECK: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> + ; CHECK: Found an estimated cost of 99 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> %r229 = sitofp <8 x i64> undef to <8 x double> - ; CHECK: Found an estimated cost of 96 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> + ; CHECK: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> %r230 = uitofp <16 x i1> undef to <16 x float> - ; CHECK: Found an estimated cost of 96 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> + ; CHECK: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> %r231 = sitofp <16 x i1> undef to <16 x float> - ; CHECK: Found an estimated cost of 96 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> + ; CHECK: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> %r232 = uitofp <16 x i8> undef to <16 x float> - ; CHECK: Found an estimated cost of 96 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> + ; CHECK: Found an estimated cost of 15 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> %r233 = sitofp <16 x i8> undef to <16 x float> ; CHECK: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> %r234 = uitofp <16 x i16> undef to <16 x float> @@ -516,30 +516,30 @@ %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> %r237 = sitofp <16 x i32> undef to <16 x float> - ; CHECK: Found an estimated cost of 224 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> + ; CHECK: Found an estimated cost of 231 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> %r238 = uitofp <16 x i64> undef to <16 x float> - ; CHECK: Found an estimated cost of 224 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> + ; CHECK: Found an estimated cost of 231 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> %r239 = sitofp <16 x i64> undef to <16 x float> - ; CHECK: Found an estimated cost of 64 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> + ; CHECK: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> %r240 = uitofp <16 x i1> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> + ; CHECK: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> %r241 = sitofp <16 x i1> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> + ; CHECK: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> %r242 = uitofp <16 x i8> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> + ; CHECK: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> %r243 = sitofp <16 x i8> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> + ; CHECK: Found an estimated cost of 31 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> %r244 = uitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> + ; CHECK: Found an estimated cost of 31 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> %r245 = sitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> + ; CHECK: Found an estimated cost of 31 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> %r246 = uitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 64 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> + ; CHECK: Found an estimated cost of 31 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> %r247 = sitofp <16 x i16> undef to <16 x double> - ; CHECK: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> + ; CHECK: Found an estimated cost of 199 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> %r248 = uitofp <16 x i64> undef to <16 x double> - ; CHECK: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> + ; CHECK: Found an estimated cost of 199 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK: Found an estimated cost of 0 for instruction: ret i32 undef Index: test/Analysis/CostModel/PowerPC/ext.ll =================================================================== --- test/Analysis/CostModel/PowerPC/ext.ll +++ test/Analysis/CostModel/PowerPC/ext.ll @@ -13,7 +13,7 @@ ; CHECK: cost of 1 {{.*}} sext %v3 = sext <4 x i16> undef to <4 x i32> - ; CHECK: cost of 112 {{.*}} sext + ; CHECK: cost of 3 {{.*}} sext %v4 = sext <8 x i16> undef to <8 x i32> ret void Index: test/Analysis/CostModel/X86/sitofp.ll =================================================================== --- test/Analysis/CostModel/X86/sitofp.ll +++ test/Analysis/CostModel/X86/sitofp.ll @@ -40,10 +40,10 @@ ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i8v8double - ; AVX1: cost of 20 {{.*}} sitofp + ; AVX1: cost of 7 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i8v8double - ; AVX2: cost of 20 {{.*}} sitofp + ; AVX2: cost of 7 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i8v8double ; AVX512F: cost of 2 {{.*}} sitofp @@ -56,13 +56,13 @@ ; SSE2: cost of 160 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i8v16double - ; AVX1: cost of 40 {{.*}} sitofp + ; AVX1: cost of 15 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i8v16double - ; AVX2: cost of 40 {{.*}} sitofp + ; AVX2: cost of 15 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i8v16double - ; AVX512F: cost of 44 {{.*}} sitofp + ; AVX512F: cost of 5 {{.*}} sitofp %1 = sitofp <16 x i8> %a to <16 x double> ret <16 x double> %1 } @@ -72,13 +72,13 @@ ; SSE2: cost of 320 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i8v32double - ; AVX1: cost of 80 {{.*}} sitofp + ; AVX1: cost of 31 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i8v32double - ; AVX2: cost of 80 {{.*}} sitofp + ; AVX2: cost of 31 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i8v32double - ; AVX512F: cost of 88 {{.*}} sitofp + ; AVX512F: cost of 11 {{.*}} sitofp %1 = sitofp <32 x i8> %a to <32 x double> ret <32 x double> %1 } @@ -120,10 +120,10 @@ ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i16v8double - ; AVX1: cost of 20 {{.*}} sitofp + ; AVX1: cost of 7 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i16v8double - ; AVX2: cost of 20 {{.*}} sitofp + ; AVX2: cost of 7 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i16v8double ; AVX512F: cost of 2 {{.*}} sitofp @@ -136,13 +136,13 @@ ; SSE2: cost of 160 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i16v16double - ; AVX1: cost of 40 {{.*}} sitofp + ; AVX1: cost of 15 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i16v16double - ; AVX2: cost of 40 {{.*}} sitofp + ; AVX2: cost of 15 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i16v16double - ; AVX512F: cost of 44 {{.*}} sitofp + ; AVX512F: cost of 5 {{.*}} sitofp %1 = sitofp <16 x i16> %a to <16 x double> ret <16 x double> %1 } @@ -152,13 +152,13 @@ ; SSE2: cost of 320 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i16v32double - ; AVX1: cost of 80 {{.*}} sitofp + ; AVX1: cost of 31 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i16v32double - ; AVX2: cost of 80 {{.*}} sitofp + ; AVX2: cost of 31 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i16v32double - ; AVX512F: cost of 88 {{.*}} sitofp + ; AVX512F: cost of 11 {{.*}} sitofp %1 = sitofp <32 x i16> %a to <32 x double> ret <32 x double> %1 } @@ -200,10 +200,10 @@ ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i32v8double - ; AVX1: cost of 20 {{.*}} sitofp + ; AVX1: cost of 3 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i32v8double - ; AVX2: cost of 20 {{.*}} sitofp + ; AVX2: cost of 3 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i32v8double ; AVX512F: cost of 1 {{.*}} sitofp @@ -216,13 +216,13 @@ ; SSE2: cost of 160 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i32v16double - ; AVX1: cost of 40 {{.*}} sitofp + ; AVX1: cost of 7 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i32v16double - ; AVX2: cost of 40 {{.*}} sitofp + ; AVX2: cost of 7 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i32v16double - ; AVX512F: cost of 44 {{.*}} sitofp + ; AVX512F: cost of 3 {{.*}} sitofp %1 = sitofp <16 x i32> %a to <16 x double> ret <16 x double> %1 } @@ -232,13 +232,13 @@ ; SSE2: cost of 320 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i32v32double - ; AVX1: cost of 80 {{.*}} sitofp + ; AVX1: cost of 15 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i32v32double - ; AVX2: cost of 80 {{.*}} sitofp + ; AVX2: cost of 15 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i32v32double - ; AVX512F: cost of 88 {{.*}} sitofp + ; AVX512F: cost of 7 {{.*}} sitofp %1 = sitofp <32 x i32> %a to <32 x double> ret <32 x double> %1 } @@ -280,10 +280,10 @@ ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i64v8double - ; AVX1: cost of 20 {{.*}} sitofp + ; AVX1: cost of 21 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i64v8double - ; AVX2: cost of 20 {{.*}} sitofp + ; AVX2: cost of 21 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i64v8double ; AVX512F: cost of 22 {{.*}} sitofp @@ -296,13 +296,13 @@ ; SSE2: cost of 160 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i64v16double - ; AVX1: cost of 40 {{.*}} sitofp + ; AVX1: cost of 43 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i64v16double - ; AVX2: cost of 40 {{.*}} sitofp + ; AVX2: cost of 43 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i64v16double - ; AVX512F: cost of 44 {{.*}} sitofp + ; AVX512F: cost of 45 {{.*}} sitofp %1 = sitofp <16 x i64> %a to <16 x double> ret <16 x double> %1 } @@ -312,13 +312,13 @@ ; SSE2: cost of 320 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i64v32double - ; AVX1: cost of 80 {{.*}} sitofp + ; AVX1: cost of 87 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i64v32double - ; AVX2: cost of 80 {{.*}} sitofp + ; AVX2: cost of 87 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i64v32double - ; AVX512F: cost of 88 {{.*}} sitofp + ; AVX512F: cost of 91 {{.*}} sitofp %1 = sitofp <32 x i64> %a to <32 x double> ret <32 x double> %1 } @@ -376,10 +376,10 @@ ; SSE2: cost of 8 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i8v16float - ; AVX1: cost of 44 {{.*}} sitofp + ; AVX1: cost of 17 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i8v16float - ; AVX2: cost of 44 {{.*}} sitofp + ; AVX2: cost of 17 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i8v16float ; AVX512F: cost of 2 {{.*}} sitofp @@ -392,13 +392,13 @@ ; SSE2: cost of 16 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i8v32float - ; AVX1: cost of 88 {{.*}} sitofp + ; AVX1: cost of 35 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i8v32float - ; AVX2: cost of 88 {{.*}} sitofp + ; AVX2: cost of 35 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i8v32float - ; AVX512F: cost of 92 {{.*}} sitofp + ; AVX512F: cost of 5 {{.*}} sitofp %1 = sitofp <32 x i8> %a to <32 x float> ret <32 x float> %1 } @@ -456,10 +456,10 @@ ; SSE2: cost of 30 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i16v16float - ; AVX1: cost of 44 {{.*}} sitofp + ; AVX1: cost of 11 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i16v16float - ; AVX2: cost of 44 {{.*}} sitofp + ; AVX2: cost of 11 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i16v16float ; AVX512F: cost of 2 {{.*}} sitofp @@ -472,13 +472,13 @@ ; SSE2: cost of 60 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i16v32float - ; AVX1: cost of 88 {{.*}} sitofp + ; AVX1: cost of 23 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i16v32float - ; AVX2: cost of 88 {{.*}} sitofp + ; AVX2: cost of 23 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i16v32float - ; AVX512F: cost of 92 {{.*}} sitofp + ; AVX512F: cost of 5 {{.*}} sitofp %1 = sitofp <32 x i16> %a to <32 x float> ret <32 x float> %1 } @@ -536,10 +536,10 @@ ; SSE2: cost of 60 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i32v16float - ; AVX1: cost of 44 {{.*}} sitofp + ; AVX1: cost of 3 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i32v16float - ; AVX2: cost of 44 {{.*}} sitofp + ; AVX2: cost of 3 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i32v16float ; AVX512F: cost of 1 {{.*}} sitofp @@ -552,13 +552,13 @@ ; SSE2: cost of 120 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i32v32float - ; AVX1: cost of 88 {{.*}} sitofp + ; AVX1: cost of 7 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i32v32float - ; AVX2: cost of 88 {{.*}} sitofp + ; AVX2: cost of 7 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i32v32float - ; AVX512F: cost of 92 {{.*}} sitofp + ; AVX512F: cost of 3 {{.*}} sitofp %1 = sitofp <32 x i32> %a to <32 x float> ret <32 x float> %1 } @@ -600,10 +600,10 @@ ; SSE2: cost of 60 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i64v8float - ; AVX1: cost of 22 {{.*}} sitofp + ; AVX1: cost of 21 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i64v8float - ; AVX2: cost of 22 {{.*}} sitofp + ; AVX2: cost of 21 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i64v8float ; AVX512F: cost of 22 {{.*}} sitofp @@ -616,13 +616,13 @@ ; SSE2: cost of 120 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i64v16float - ; AVX1: cost of 44 {{.*}} sitofp + ; AVX1: cost of 43 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i64v16float - ; AVX2: cost of 44 {{.*}} sitofp + ; AVX2: cost of 43 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i64v16float - ; AVX512F: cost of 46 {{.*}} sitofp + ; AVX512F: cost of 45 {{.*}} sitofp %1 = sitofp <16 x i64> %a to <16 x float> ret <16 x float> %1 } @@ -632,13 +632,13 @@ ; SSE2: cost of 240 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i64v32float - ; AVX1: cost of 88 {{.*}} sitofp + ; AVX1: cost of 87 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i64v32float - ; AVX2: cost of 88 {{.*}} sitofp + ; AVX2: cost of 87 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i64v32float - ; AVX512F: cost of 92 {{.*}} sitofp + ; AVX512F: cost of 91 {{.*}} sitofp %1 = sitofp <32 x i64> %a to <32 x float> ret <32 x float> %1 } @@ -648,10 +648,10 @@ ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i1v8double - ; AVX1: cost of 20 {{.*}} sitofp + ; AVX1: cost of 7 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i1v8double - ; AVX2: cost of 20 {{.*}} sitofp + ; AVX2: cost of 7 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i1v8double ; AVX512F: cost of 4 {{.*}} sitofp @@ -665,10 +665,10 @@ ; SSE2: cost of 8 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i1v16float - ; AVX1: cost of 44 {{.*}} sitofp + ; AVX1: cost of 17 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i1v16float - ; AVX2: cost of 44 {{.*}} sitofp + ; AVX2: cost of 17 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i1v16float ; AVX512F: cost of 3 {{.*}} sitofp Index: test/Analysis/CostModel/X86/uitofp.ll =================================================================== --- test/Analysis/CostModel/X86/uitofp.ll +++ test/Analysis/CostModel/X86/uitofp.ll @@ -41,10 +41,10 @@ ; SSE2: cost of 80 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i8v8double - ; AVX1: cost of 20 {{.*}} uitofp + ; AVX1: cost of 5 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i8v8double - ; AVX2: cost of 20 {{.*}} uitofp + ; AVX2: cost of 5 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i8v8double ; AVX512F: cost of 2 {{.*}} uitofp @@ -57,13 +57,13 @@ ; SSE2: cost of 160 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i8v16double - ; AVX1: cost of 40 {{.*}} uitofp + ; AVX1: cost of 11 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i8v16double - ; AVX2: cost of 40 {{.*}} uitofp + ; AVX2: cost of 11 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i8v16double - ; AVX512F: cost of 44 {{.*}} uitofp + ; AVX512F: cost of 5 {{.*}} uitofp %1 = uitofp <16 x i8> %a to <16 x double> ret <16 x double> %1 } @@ -73,13 +73,13 @@ ; SSE2: cost of 320 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i8v32double - ; AVX1: cost of 80 {{.*}} uitofp + ; AVX1: cost of 23 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i8v32double - ; AVX2: cost of 80 {{.*}} uitofp + ; AVX2: cost of 23 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i8v32double - ; AVX512F: cost of 88 {{.*}} uitofp + ; AVX512F: cost of 11 {{.*}} uitofp %1 = uitofp <32 x i8> %a to <32 x double> ret <32 x double> %1 } @@ -121,10 +121,10 @@ ; SSE2: cost of 80 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i16v8double - ; AVX1: cost of 20 {{.*}} uitofp + ; AVX1: cost of 5 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i16v8double - ; AVX2: cost of 20 {{.*}} uitofp + ; AVX2: cost of 5 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i16v8double ; AVX512F: cost of 2 {{.*}} uitofp @@ -137,13 +137,13 @@ ; SSE2: cost of 160 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i16v16double - ; AVX1: cost of 40 {{.*}} uitofp + ; AVX1: cost of 11 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i16v16double - ; AVX2: cost of 40 {{.*}} uitofp + ; AVX2: cost of 11 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i16v16double - ; AVX512F: cost of 44 {{.*}} uitofp + ; AVX512F: cost of 5 {{.*}} uitofp %1 = uitofp <16 x i16> %a to <16 x double> ret <16 x double> %1 } @@ -153,13 +153,13 @@ ; SSE2: cost of 320 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i16v32double - ; AVX1: cost of 80 {{.*}} uitofp + ; AVX1: cost of 23 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i16v32double - ; AVX2: cost of 80 {{.*}} uitofp + ; AVX2: cost of 23 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i16v32double - ; AVX512F: cost of 88 {{.*}} uitofp + ; AVX512F: cost of 11 {{.*}} uitofp %1 = uitofp <32 x i16> %a to <32 x double> ret <32 x double> %1 } @@ -201,10 +201,10 @@ ; SSE2: cost of 80 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i32v8double - ; AVX1: cost of 20 {{.*}} uitofp + ; AVX1: cost of 13 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i32v8double - ; AVX2: cost of 20 {{.*}} uitofp + ; AVX2: cost of 13 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i32v8double ; AVX512F: cost of 1 {{.*}} uitofp @@ -217,13 +217,13 @@ ; SSE2: cost of 160 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i32v16double - ; AVX1: cost of 40 {{.*}} uitofp + ; AVX1: cost of 27 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i32v16double - ; AVX2: cost of 40 {{.*}} uitofp + ; AVX2: cost of 27 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i32v16double - ; AVX512F: cost of 44 {{.*}} uitofp + ; AVX512F: cost of 3 {{.*}} uitofp %1 = uitofp <16 x i32> %a to <16 x double> ret <16 x double> %1 } @@ -233,13 +233,13 @@ ; SSE2: cost of 320 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i32v32double - ; AVX1: cost of 80 {{.*}} uitofp + ; AVX1: cost of 55 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i32v32double - ; AVX2: cost of 80 {{.*}} uitofp + ; AVX2: cost of 55 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i32v32double - ; AVX512F: cost of 88 {{.*}} uitofp + ; AVX512F: cost of 7 {{.*}} uitofp %1 = uitofp <32 x i32> %a to <32 x double> ret <32 x double> %1 } @@ -257,7 +257,7 @@ ; AVX512F-LABEL: uitofpv2i64v2double ; AVX512F: cost of 5 {{.*}} uitofp ; - ; AVX512DQ: uitofpv2i64v2double + ; AVX512DQ-LABEL: uitofpv2i64v2double ; AVX512DQ: cost of 1 {{.*}} uitofp %1 = uitofp <2 x i64> %a to <2 x double> ret <2 x double> %1 @@ -276,7 +276,7 @@ ; AVX512F-LABEL: uitofpv4i64v4double ; AVX512F: cost of 12 {{.*}} uitofp ; - ; AVX512DQ: uitofpv4i64v4double + ; AVX512DQ-LABEL: uitofpv4i64v4double ; AVX512DQ: cost of 1 {{.*}} uitofp %1 = uitofp <4 x i64> %a to <4 x double> ret <4 x double> %1 @@ -287,15 +287,15 @@ ; SSE2: cost of 80 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i64v8double - ; AVX1: cost of 20 {{.*}} uitofp + ; AVX1: cost of 81 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i64v8double - ; AVX2: cost of 20 {{.*}} uitofp + ; AVX2: cost of 81 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i64v8double ; AVX512F: cost of 26 {{.*}} uitofp ; - ; AVX512DQ: uitofpv8i64v8double + ; AVX512DQ-LABEL: uitofpv8i64v8double ; AVX512DQ: cost of 1 {{.*}} uitofp %1 = uitofp <8 x i64> %a to <8 x double> ret <8 x double> %1 @@ -306,16 +306,16 @@ ; SSE2: cost of 160 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i64v16double - ; AVX1: cost of 40 {{.*}} uitofp + ; AVX1: cost of 163 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i64v16double - ; AVX2: cost of 40 {{.*}} uitofp + ; AVX2: cost of 163 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i64v16double - ; AVX512F: cost of 44 {{.*}} uitofp + ; AVX512F: cost of 53 {{.*}} uitofp ; - ; AVX512DQ: uitofpv16i64v16double - ; AVX512DQ: cost of 44 {{.*}} uitofp + ; AVX512DQ-LABEL: uitofpv16i64v16double + ; AVX512DQ: cost of 3 {{.*}} uitofp %1 = uitofp <16 x i64> %a to <16 x double> ret <16 x double> %1 } @@ -325,16 +325,16 @@ ; SSE2: cost of 320 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i64v32double - ; AVX1: cost of 80 {{.*}} uitofp + ; AVX1: cost of 327 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i64v32double - ; AVX2: cost of 80 {{.*}} uitofp + ; AVX2: cost of 327 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i64v32double - ; AVX512F: cost of 88 {{.*}} uitofp + ; AVX512F: cost of 107 {{.*}} uitofp ; - ; AVX512DQ: uitofpv32i64v32double - ; AVX512DQ: cost of 88 {{.*}} uitofp + ; AVX512DQ-LABEL: uitofpv32i64v32double + ; AVX512DQ: cost of 2 {{.*}} uitofp %1 = uitofp <32 x i64> %a to <32 x double> ret <32 x double> %1 } @@ -392,10 +392,10 @@ ; SSE2: cost of 8 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i8v16float - ; AVX1: cost of 44 {{.*}} uitofp + ; AVX1: cost of 11 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i8v16float - ; AVX2: cost of 44 {{.*}} uitofp + ; AVX2: cost of 11 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i8v16float ; AVX512F: cost of 2 {{.*}} uitofp @@ -408,13 +408,13 @@ ; SSE2: cost of 16 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i8v32float - ; AVX1: cost of 88 {{.*}} uitofp + ; AVX1: cost of 23 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i8v32float - ; AVX2: cost of 88 {{.*}} uitofp + ; AVX2: cost of 23 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i8v32float - ; AVX512F: cost of 92 {{.*}} uitofp + ; AVX512F: cost of 5 {{.*}} uitofp %1 = uitofp <32 x i8> %a to <32 x float> ret <32 x float> %1 } @@ -472,10 +472,10 @@ ; SSE2: cost of 30 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i16v16float - ; AVX1: cost of 44 {{.*}} uitofp + ; AVX1: cost of 11 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i16v16float - ; AVX2: cost of 44 {{.*}} uitofp + ; AVX2: cost of 11 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i16v16float ; AVX512F: cost of 2 {{.*}} uitofp @@ -488,13 +488,13 @@ ; SSE2: cost of 60 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i16v32float - ; AVX1: cost of 88 {{.*}} uitofp + ; AVX1: cost of 23 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i16v32float - ; AVX2: cost of 88 {{.*}} uitofp + ; AVX2: cost of 23 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i16v32float - ; AVX512F: cost of 92 {{.*}} uitofp + ; AVX512F: cost of 5 {{.*}} uitofp %1 = uitofp <32 x i16> %a to <32 x float> ret <32 x float> %1 } @@ -552,10 +552,10 @@ ; SSE2: cost of 32 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i32v16float - ; AVX1: cost of 44 {{.*}} uitofp + ; AVX1: cost of 19 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i32v16float - ; AVX2: cost of 44 {{.*}} uitofp + ; AVX2: cost of 17 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i32v16float ; AVX512F: cost of 1 {{.*}} uitofp @@ -568,13 +568,13 @@ ; SSE2: cost of 64 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i32v32float - ; AVX1: cost of 88 {{.*}} uitofp + ; AVX1: cost of 39 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i32v32float - ; AVX2: cost of 88 {{.*}} uitofp + ; AVX2: cost of 35 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i32v32float - ; AVX512F: cost of 92 {{.*}} uitofp + ; AVX512F: cost of 3 {{.*}} uitofp %1 = uitofp <32 x i32> %a to <32 x float> ret <32 x float> %1 } @@ -616,10 +616,10 @@ ; SSE2: cost of 60 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i64v8float - ; AVX1: cost of 22 {{.*}} uitofp + ; AVX1: cost of 21 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i64v8float - ; AVX2: cost of 22 {{.*}} uitofp + ; AVX2: cost of 21 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i64v8float ; AVX512F: cost of 22 {{.*}} uitofp @@ -632,13 +632,13 @@ ; SSE2: cost of 120 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i64v16float - ; AVX1: cost of 44 {{.*}} uitofp + ; AVX1: cost of 43 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i64v16float - ; AVX2: cost of 44 {{.*}} uitofp + ; AVX2: cost of 43 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i64v16float - ; AVX512F: cost of 46 {{.*}} uitofp + ; AVX512F: cost of 45 {{.*}} uitofp %1 = uitofp <16 x i64> %a to <16 x float> ret <16 x float> %1 } @@ -648,13 +648,13 @@ ; SSE2: cost of 240 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i64v32float - ; AVX1: cost of 88 {{.*}} uitofp + ; AVX1: cost of 87 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i64v32float - ; AVX2: cost of 88 {{.*}} uitofp + ; AVX2: cost of 87 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i64v32float - ; AVX512F: cost of 92 {{.*}} uitofp + ; AVX512F: cost of 91 {{.*}} uitofp %1 = uitofp <32 x i64> %a to <32 x float> ret <32 x float> %1 } Index: test/Transforms/LoopVectorize/X86/gather_scatter.ll =================================================================== --- test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -17,9 +17,9 @@ ;} ;AVX512-LABEL: @foo1 -;AVX512: llvm.masked.load.v8i32 -;AVX512: llvm.masked.gather.v8f32 -;AVX512: llvm.masked.store.v8f32 +;AVX512: llvm.masked.load.v16i32 +;AVX512: llvm.masked.gather.v16f32 +;AVX512: llvm.masked.store.v16f32 ;AVX512: ret void ; Function Attrs: nounwind uwtable