Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -785,18 +785,24 @@ // of casting the original vector twice. We also need to factor in the // cost of the split itself. Count that as 1, to be consistent with // TLI->getTypeLegalizationCost(). - if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == - TargetLowering::TypeSplitVector || - TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == - TargetLowering::TypeSplitVector) && - SrcVTy->getNumElements() > 1 && DstVTy->getNumElements() > 1) { - Type *SplitDst = VectorType::get(DstVTy->getElementType(), - DstVTy->getNumElements() / 2); - Type *SplitSrc = VectorType::get(SrcVTy->getElementType(), - SrcVTy->getNumElements() / 2); + bool SplitSrc = + TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == + TargetLowering::TypeSplitVector; + bool SplitDst = + TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == + TargetLowering::TypeSplitVector; + if ((SplitSrc || SplitDst) && SrcVTy->getNumElements() > 1 && + DstVTy->getNumElements() > 1) { + Type *SplitDstTy = VectorType::get(DstVTy->getElementType(), + DstVTy->getNumElements() / 2); + Type *SplitSrcTy = VectorType::get(SrcVTy->getElementType(), + SrcVTy->getNumElements() / 2); T *TTI = static_cast<T *>(this); - return TTI->getVectorSplitCost() + - (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I)); + // If both types need to be split then the split is free. + unsigned SplitCost = + (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0; + return SplitCost + + (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, I)); } // In other cases where the source or destination are illegal, assume Index: llvm/test/Analysis/CostModel/ARM/cast.ll =================================================================== --- llvm/test/Analysis/CostModel/ARM/cast.ll +++ llvm/test/Analysis/CostModel/ARM/cast.ll @@ -149,8 +149,8 @@ ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> @@ -159,18 +159,18 @@ ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> @@ -179,18 +179,18 @@ ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> @@ -229,8 +229,8 @@ ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> @@ -239,8 +239,8 @@ ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> @@ -249,8 +249,8 @@ ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> @@ -259,18 +259,18 @@ ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-MVE-LABEL: 'casts' @@ -371,13 +371,13 @@ ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r86 = fpext <2 x float> undef to <2 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %r89 = fpext <16 x float> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> @@ -416,8 +416,8 @@ ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> @@ -426,38 +426,38 @@ ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 661 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> @@ -496,8 +496,8 @@ ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> @@ -506,8 +506,8 @@ ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> @@ -516,28 +516,28 @@ ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1045 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-LABEL: 'casts' @@ -621,30 +621,30 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> @@ -653,16 +653,16 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> @@ -673,16 +673,16 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> @@ -693,16 +693,16 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> @@ -713,16 +713,16 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> @@ -733,16 +733,16 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> @@ -753,16 +753,16 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> @@ -773,16 +773,16 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> @@ -793,16 +793,16 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -888,30 +888,30 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s72 = zext <4 x i8> undef to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r72 = zext <8 x i8> undef to <8 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r73 = zext <16 x i8> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_6 = sext <2 x i8> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_7 = zext <2 x i8> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %rext_8 = sext <2 x i16> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_9 = zext <2 x i16> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_a = sext <2 x i32> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %rext_b = zext <2 x i32> undef to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r86 = fpext <2 x float> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r87 = fpext <4 x float> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r88 = fpext <8 x float> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r89 = fpext <16 x float> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r86 = fpext <2 x float> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r87 = fpext <4 x float> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r88 = fpext <8 x float> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r89 = fpext <16 x float> undef to <16 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> @@ -920,16 +920,16 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> @@ -940,16 +940,16 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> @@ -960,16 +960,16 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> @@ -980,16 +980,16 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> @@ -1000,16 +1000,16 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> @@ -1020,16 +1020,16 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> @@ -1040,16 +1040,16 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> @@ -1060,16 +1060,16 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -1217,8 +1217,8 @@ ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> @@ -1227,18 +1227,18 @@ ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> @@ -1247,18 +1247,18 @@ ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> @@ -1297,8 +1297,8 @@ ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> @@ -1307,8 +1307,8 @@ ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> @@ -1317,8 +1317,8 @@ ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> @@ -1327,18 +1327,18 @@ ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; -- scalars -- @@ -1742,14 +1742,14 @@ ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8M-MAIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-LABEL: 'load_extends' @@ -1778,14 +1778,14 @@ ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1 = zext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = sext <4 x i8> %loadv4i8 to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3 = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4 = sext <2 x i8> %loadv2i8 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v5 = zext <2 x i8> %loadv2i8 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v6 = sext <4 x i16> %loadv4i16 to <4 x i32> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v7 = zext <4 x i16> %loadv4i16 to <4 x i32> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> -; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = sext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = zext <2 x i16> %loadv2i16 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v10 = sext <2 x i32> %loadv2i32 to <2 x i64> +; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v11 = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8M-BASE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8R-LABEL: 'load_extends' Index: llvm/test/Analysis/CostModel/X86/arith-fix.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/arith-fix.ll +++ llvm/test/Analysis/CostModel/X86/arith-fix.ll @@ -35,77 +35,77 @@ ; SSSE3-LABEL: 'smul' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'smul' ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'smul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'smul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'smul' @@ -168,58 +168,58 @@ ; SLM-LABEL: 'smul' ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'smul' ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'smul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.smul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.smul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call i32 @llvm.smul.fix.i32(i32 undef, i32 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I32 = call <8 x i32> @llvm.smul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I32 = call <16 x i32> @llvm.smul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.smul.fix.i16(i16 undef, i16 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call <8 x i16> @llvm.smul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.smul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I16 = call <32 x i16> @llvm.smul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.smul.fix.i8(i8 undef, i8 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.smul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32I8 = call <32 x i8> @llvm.smul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V64I8 = call <64 x i8> @llvm.smul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call i64 @llvm.smul.fix.i64(i64 undef, i64 undef, i32 3) @@ -269,77 +269,77 @@ ; SSSE3-LABEL: 'umul' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'umul' ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'umul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'umul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'umul' @@ -402,58 +402,58 @@ ; SLM-LABEL: 'umul' ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'umul' ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; GLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'umul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umul.fix.v2i64(<2 x i64> undef, <2 x i64> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call <4 x i64> @llvm.umul.fix.v4i64(<4 x i64> undef, <4 x i64> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V8I64 = call <8 x i64> @llvm.umul.fix.v8i64(<8 x i64> undef, <8 x i64> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.umul.fix.i32(i32 undef, i32 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> undef, <4 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I32 = call <8 x i32> @llvm.umul.fix.v8i32(<8 x i32> undef, <8 x i32> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I32 = call <16 x i32> @llvm.umul.fix.v16i32(<16 x i32> undef, <16 x i32> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call i16 @llvm.umul.fix.i16(i16 undef, i16 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call <8 x i16> @llvm.umul.fix.v8i16(<8 x i16> undef, <8 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call <16 x i16> @llvm.umul.fix.v16i16(<16 x i16> undef, <16 x i16> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32I16 = call <32 x i16> @llvm.umul.fix.v32i16(<32 x i16> undef, <32 x i16> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call i8 @llvm.umul.fix.i8(i8 undef, i8 undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.umul.fix.v16i8(<16 x i8> undef, <16 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V32I8 = call <32 x i8> @llvm.umul.fix.v32i8(<32 x i8> undef, <32 x i8> undef, i32 3) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V64I8 = call <64 x i8> @llvm.umul.fix.v64i8(<64 x i8> undef, <64 x i8> undef, i32 3) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call i64 @llvm.umul.fix.i64(i64 undef, i64 undef, i32 3) Index: llvm/test/Analysis/CostModel/X86/arith-overflow.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/arith-overflow.ll +++ llvm/test/Analysis/CostModel/X86/arith-overflow.ll @@ -991,77 +991,77 @@ ; SSSE3-LABEL: 'smul' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'smul' ; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'smul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'smul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'smul' @@ -1124,58 +1124,58 @@ ; SLM-LABEL: 'smul' ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'smul' ; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'smul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef) @@ -1229,77 +1229,77 @@ ; SSSE3-LABEL: 'umul' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSSE3-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSSE3-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'umul' ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'umul' ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'umul' ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'umul' @@ -1362,58 +1362,58 @@ ; SLM-LABEL: 'umul' ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'umul' ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; GLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; GLM-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'umul' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef) Index: llvm/test/Analysis/CostModel/X86/cast.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/cast.ll +++ llvm/test/Analysis/CostModel/X86/cast.ll @@ -85,8 +85,8 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D = zext <4 x i32> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> @@ -95,7 +95,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G = trunc <8 x i64> undef to <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -115,8 +115,8 @@ ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> ; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D = zext <4 x i32> undef to <4 x i64> -; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> -; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> @@ -125,7 +125,7 @@ ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> ; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> -; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G = trunc <8 x i64> undef to <8 x i32> ; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> ; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef @@ -529,9 +529,9 @@ define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) { ; SSE-LABEL: 'fp_conv' ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = fpext <4 x float> %c to <4 x double> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = fpext <8 x float> %a to <8 x double> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A2 = fpext <8 x float> %a to <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A3 = fptrunc <4 x double> undef to <4 x float> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A4 = fptrunc <8 x double> undef to <8 x float> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A4 = fptrunc <8 x double> undef to <8 x float> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'fp_conv' Index: llvm/test/Analysis/CostModel/X86/extend.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/extend.ll +++ llvm/test/Analysis/CostModel/X86/extend.ll @@ -16,21 +16,21 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'zext_vXi32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'zext_vXi32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I64 = zext i32 undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i32> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = zext <4 x i32> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = zext <8 x i32> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'zext_vXi32' @@ -184,7 +184,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'zext_vXi8' @@ -202,7 +202,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'zext_vXi8' @@ -220,7 +220,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i8> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i8> undef to <8 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = zext <16 x i8> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = zext <32 x i8> undef to <32 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'zext_vXi8' @@ -350,7 +350,7 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> @@ -519,21 +519,21 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sext_vXi32' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sext_vXi32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i32 undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i32> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = sext <4 x i32> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sext <8 x i32> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sext_vXi32' @@ -687,7 +687,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'sext_vXi8' @@ -705,7 +705,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'sext_vXi8' @@ -723,7 +723,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i8> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = sext <8 x i8> undef to <8 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = sext <16 x i8> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sext <32 x i8> undef to <32 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'sext_vXi8' @@ -853,7 +853,7 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> Index: llvm/test/Analysis/CostModel/X86/fptosi.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/fptosi.ll +++ llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -14,22 +14,22 @@ ; SSE2-LABEL: 'fptosi_double_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptosi_double_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_double_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptosi_double_i64' @@ -49,8 +49,8 @@ ; SLM-LABEL: 'fptosi_double_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = fptosi double undef to i64 @@ -65,7 +65,7 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_double_i32' @@ -166,16 +166,16 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptosi_float_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_float_i64' @@ -183,7 +183,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptosi_float_i64' @@ -206,8 +206,8 @@ ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = fptosi float undef to i64 @@ -257,7 +257,7 @@ ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> -; SSE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptosi_float_i16' Index: llvm/test/Analysis/CostModel/X86/fptoui.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/fptoui.ll +++ llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -14,22 +14,22 @@ ; SSE2-LABEL: 'fptoui_double_i64' ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_double_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_double_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_double_i64' @@ -49,8 +49,8 @@ ; SLM-LABEL: 'fptoui_double_i64' ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = fptoui double undef to i64 @@ -65,14 +65,14 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_double_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_double_i32' @@ -93,7 +93,7 @@ ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I32 = fptoui double undef to i32 @@ -180,16 +180,16 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; SSE2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_float_i64' ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; SSE42-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_float_i64' @@ -197,7 +197,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_float_i64' @@ -220,8 +220,8 @@ ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; SLM-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = fptoui float undef to i64 @@ -237,16 +237,16 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_float_i32' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_float_i32' @@ -254,7 +254,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fptoui_float_i32' @@ -269,8 +269,8 @@ ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 ; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I32 = fptoui float undef to i32 @@ -287,7 +287,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'fptoui_float_i16' @@ -295,7 +295,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'fptoui_float_i16' @@ -319,7 +319,7 @@ ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> ; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> -; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I16 = fptoui float undef to i16 Index: llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -1337,31 +1337,31 @@ define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) { ; SSE2-LABEL: 'test_gather_16f32_const_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_const_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1381,31 +1381,31 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) { ; SSE2-LABEL: 'test_gather_16f32_var_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_var_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_var_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1425,31 +1425,31 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' -; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1471,7 +1471,7 @@ ; SSE2-LABEL: 'test_gather_16f32_const_mask2' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1479,7 +1479,7 @@ ; SSE42-LABEL: 'test_gather_16f32_const_mask2' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind ; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1487,7 +1487,7 @@ ; AVX1-LABEL: 'test_gather_16f32_const_mask2' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1495,7 +1495,7 @@ ; AVX2-LABEL: 'test_gather_16f32_const_mask2' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res @@ -1503,7 +1503,7 @@ ; SKL-LABEL: 'test_gather_16f32_const_mask2' ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer -; SKL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind ; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res Index: llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll +++ llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll @@ -268,13 +268,13 @@ ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i8 = zext <8 x i1> undef to <8 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = zext <16 x i1> undef to <16 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = zext <32 x i1> undef to <32 x i8> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V64i8 = zext <64 x i1> undef to <64 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512VL512-LABEL: 'zext256_vXi1' @@ -406,14 +406,14 @@ ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8 ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i8 = sext <8 x i1> undef to <8 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i8 = sext <16 x i1> undef to <16 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i8 = sext <32 x i1> undef to <32 x i8> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64i8 = sext <64 x i1> undef to <64 x i8> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512VL512-LABEL: 'sext256_vXi1' @@ -527,19 +527,19 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> @@ -553,26 +553,26 @@ ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = trunc <16 x i8> undef to <16 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = trunc <32 x i8> undef to <32 x i1> -; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> +; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = trunc <64 x i8> undef to <64 x i1> ; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512VL512-LABEL: 'trunc_vXi1' @@ -580,20 +580,20 @@ ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> ; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> Index: llvm/test/Analysis/CostModel/X86/sitofp.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/sitofp.ll +++ llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -109,7 +109,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double ; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sitofp_i64_double' @@ -213,7 +213,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'sitofp_i32_float' @@ -246,7 +246,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sitofp_i64_float' Index: llvm/test/Analysis/CostModel/X86/trunc.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/trunc.ll +++ llvm/test/Analysis/CostModel/X86/trunc.ll @@ -15,15 +15,15 @@ ; SSE-LABEL: 'trunc_vXi32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'trunc_vXi32' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'trunc_vXi32' @@ -45,39 +45,39 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'trunc_vXi16' ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'trunc_vXi16' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = trunc <2 x i64> undef to <2 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'trunc_vXi16' @@ -85,12 +85,12 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'trunc_vXi16' @@ -98,12 +98,12 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'trunc_vXi16' @@ -124,12 +124,12 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2i64 = trunc <2 x i64> undef to <2 x i16> @@ -152,20 +152,20 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSSE3-LABEL: 'trunc_vXi8' @@ -173,20 +173,20 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 159 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'trunc_vXi8' @@ -194,20 +194,20 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'trunc_vXi8' @@ -216,19 +216,19 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'trunc_vXi8' @@ -237,19 +237,19 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'trunc_vXi8' @@ -300,19 +300,19 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2i64 = trunc <2 x i64> undef to <2 x i8> @@ -345,20 +345,20 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> @@ -372,20 +372,20 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> @@ -399,20 +399,20 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> @@ -427,19 +427,19 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> @@ -453,20 +453,20 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = trunc <4 x i64> undef to <4 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = trunc <4 x i32> undef to <4 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i8 = trunc <4 x i8> undef to <4 x i1> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i8 = trunc <8 x i8> undef to <8 x i1> Index: llvm/test/Analysis/CostModel/X86/uitofp.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/uitofp.ll +++ llvm/test/Analysis/CostModel/X86/uitofp.ll @@ -109,7 +109,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_i64_f64 = uitofp i64 undef to double ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f64 = uitofp <2 x i64> undef to <2 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i64_v4f64 = uitofp <4 x i64> undef to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v8i64_v8f64 = uitofp <8 x i64> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'uitofp_i64_double' @@ -213,7 +213,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'uitofp_i32_float' @@ -221,7 +221,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'uitofp_i32_float' @@ -237,7 +237,7 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %cvt_i32_f32 = uitofp i32 undef to float @@ -262,7 +262,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> -; AVX-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %cvt_v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'uitofp_i64_float'