Index: lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -39,6 +39,8 @@ unsigned getInliningThresholdMultiplier() { return 3; } + unsigned getUserCost(const User *U, ArrayRef Operands); + int getIntImmCost(const APInt &Imm, Type *Ty); int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); Index: lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -31,6 +31,18 @@ // //===----------------------------------------------------------------------===// +unsigned SystemZTTIImpl::getUserCost(const User *U, + ArrayRef Operands) { + // Extension of scalar i1 costs two instructions for integer or a branch + // sequence for floating point. Return at least TCC_Basic (instead of + // TCC_Free). + if (const CastInst *CI = dyn_cast(U)) + if (!CI->getType()->isVectorTy() && isa(CI->getOperand(0))) + return TTI::TCC_Basic; + + return BaseT::getUserCost(U, Operands); +} + int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); Index: test/Analysis/CostModel/SystemZ/getUserCost_ext_i1.ll =================================================================== --- /dev/null +++ test/Analysis/CostModel/SystemZ/getUserCost_ext_i1.ll @@ -0,0 +1,95 @@ +; RUN: opt < %s -mtriple=s390x-unknown-linux -mcpu=z13 -o - -loop-unroll \ +; RUN: -debug-only=loop-unroll 2>&1 | FileCheck %s +; REQUIRES: asserts +; +; Check that getUserCost() does not return TCC_Free (0 cost) for extensions +; of i1. This function is called by CodeMetrics and seems to be a poor-mans +; alternative to the methods CostModel is using. There is no direct way of +; testing the costs for the instructions, but one way is to make a small loop +; and run loop-unroller and see what the loop size is. +; +; Phis are free, but the other instructions in for.body are not. Test that +; extensions of i1 is counted as 1 (and not 0) by checking that the sum is 5. + +; CHECK: fun0 +; CHECK: Loop Size = 5 +; CHECK: fun1 +; CHECK: Loop Size = 5 +; CHECK: fun2 +; CHECK: Loop Size = 5 +; CHECK: fun3 +; CHECK: Loop Size = 5 + +define i64 @fun0(i64 %n, i32 %v) { +entry: + %cmp0 = icmp slt i32 %v, 0 + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %S = phi i64 [ 0, %entry], [ %a, %for.body ] + %conv = zext i1 %cmp0 to i64 + %a = add i64 %S, %conv + %iv.next = add nuw nsw i64 %iv, 1 + %cmp = icmp slt i64 %iv.next, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret i64 %S +} + +define i64 @fun1(i64 %n, i32 %v) { +entry: + %cmp0 = icmp slt i32 %v, 0 + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %S = phi i64 [ 0, %entry], [ %a, %for.body ] + %conv = sext i1 %cmp0 to i64 + %a = add i64 %S, %conv + %iv.next = add nuw nsw i64 %iv, 1 + %cmp = icmp slt i64 %iv.next, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret i64 %S +} + +define double @fun2(i64 %n, i32 %v) { +entry: + %cmp0 = icmp slt i32 %v, 0 + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %S = phi double [ 0.0, %entry], [ %fpadd, %for.body ] + %conv = uitofp i1 %cmp0 to double + %fpadd = fadd double %S, %conv + %iv.next = add nuw nsw i64 %iv, 1 + %cmp = icmp slt i64 %iv.next, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret double %S +} + +define double @fun3(i64 %n, i32 %v) { +entry: + %cmp0 = icmp slt i32 %v, 0 + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %S = phi double [ 0.0, %entry], [ %fpadd, %for.body ] + %conv = sitofp i1 %cmp0 to double + %fpadd = fadd double %S, %conv + %iv.next = add nuw nsw i64 %iv, 1 + %cmp = icmp slt i64 %iv.next, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret double %S +} + +