Index: lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -80,6 +80,8 @@ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); + unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, + const Instruction *I); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, Index: lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -635,6 +635,25 @@ return nullptr; } +// Get the cost of converting a boolean vector to a vector with same width +// and element size as Dst, plus the cost of zero extending if needed. +unsigned SystemZTTIImpl:: +getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, + const Instruction *I) { + assert (Dst->isVectorTy()); + unsigned VF = Dst->getVectorNumElements(); + unsigned Cost = 0; + // If we know what the widths of the compared operands, get any cost of + // converting it to match Dst. Otherwise assume same widths. + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr); + if (CmpOpTy != nullptr) + Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst); + if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP) + // One 'vn' per dst vector with an immediate mask. + Cost += getNumVectorRegs(Dst); + return Cost; +} + int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I) { unsigned DstScalarBits = Dst->getScalarSizeInBits(); @@ -666,19 +685,8 @@ return (NumUnpacks * NumDstVectors) + NumSrcVectorOps; } - else if (SrcScalarBits == 1) { - // This should be extension of a compare i1 result. - // If we know what the widths of the compared operands, get the - // cost of converting it to Dst. Otherwise assume same widths. - unsigned Cost = 0; - Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr); - if (CmpOpTy != nullptr) - Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst); - if (Opcode == Instruction::ZExt) - // One 'vn' per dst vector with an immediate mask. - Cost += NumDstVectors; - return Cost; - } + else if (SrcScalarBits == 1) + return getBoolVecToIntConversionCost(Opcode, Dst, I); } if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP || @@ -687,8 +695,13 @@ // (seems to miss on differentiating on scalar/vector types). // Only 64 bit vector conversions are natively supported. - if (SrcScalarBits == 64 && DstScalarBits == 64) - return NumDstVectors; + if (DstScalarBits == 64) { + if (SrcScalarBits == 64) + return NumDstVectors; + + if (SrcScalarBits == 1) + return getBoolVecToIntConversionCost(Opcode, Dst, I) + NumDstVectors; + } // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. Base implementation does not Index: test/Analysis/CostModel/SystemZ/cmp-tofp.ll =================================================================== --- /dev/null +++ test/Analysis/CostModel/SystemZ/cmp-tofp.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s +; +; Costs for conversion of i1 vectors to vectors of double. + +define <2 x double> @fun0(<2 x i8> %val1, <2 x i8> %val2) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %v = uitofp <2 x i1> %cmp to <2 x double> + ret <2 x double> %v + +; CHECK: fun0 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 5 for instruction: %v = uitofp <2 x i1> %cmp to <2 x double> +} + +define <2 x double> @fun1(<2 x i8> %val1, <2 x i8> %val2) { + %cmp = icmp eq <2 x i8> %val1, %val2 + %v = sitofp <2 x i1> %cmp to <2 x double> + ret <2 x double> %v + +; CHECK: fun1 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2 +; CHECK: cost of 4 for instruction: %v = sitofp <2 x i1> %cmp to <2 x double> +} + +define <2 x double> @fun2(<2 x i64> %val1, <2 x i64> %val2) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %v = uitofp <2 x i1> %cmp to <2 x double> + ret <2 x double> %v + +; CHECK: fun2 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 2 for instruction: %v = uitofp <2 x i1> %cmp to <2 x double> +} + +define <2 x double> @fun3(<2 x i64> %val1, <2 x i64> %val2) { + %cmp = icmp eq <2 x i64> %val1, %val2 + %v = sitofp <2 x i1> %cmp to <2 x double> + ret <2 x double> %v + +; CHECK: fun3 +; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2 +; CHECK: cost of 1 for instruction: %v = sitofp <2 x i1> %cmp to <2 x double> +}