Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -653,21 +653,36 @@ } return true; - } else if (isa(J) && - (J->getType()->getScalarType()->isFP128Ty() || - J->getType()->getScalarType()->isPPC_FP128Ty())) { - // Most operations on f128 or ppc_f128 values become calls. - return true; - } else if (isa(J) || isa(J) || - isa(J) || isa(J)) { - CastInst *CI = cast(J); - if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || - CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || - isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) || - isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType())) + } else if (const auto *CI = dyn_cast(J)) { + if (!ST->hasP9Vector() && (CI->getSrcTy()->getScalarType()->isFP128Ty() || + CI->getDestTy()->getScalarType()->isFP128Ty())) + return true; + // FIXME: ppc_fp128 to i32 and i32/u32 to ppc_fp128 don't require call + if (isa(J) || isa(J) || isa(J) || + isa(J)) + if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || + CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || + isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) || + isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType())) + return true; + } else if (isa(J)) { + if (!ST->hasP9Vector() && + J->getOperand(0)->getType()->getScalarType()->isFP128Ty()) + return true; + } else if (J->getType()->getScalarType()->isPPC_FP128Ty()) { + // There are no native instructions for ppc_fp128, but some operations can + // be lowered into separate instructions for high and low parts. + if (!isa(J) && !isa(J) && !isa(J) && + J->getOpcode() != Instruction::FNeg) + return true; + } else if (J->getType()->getScalarType()->isFP128Ty()) { + // Since Power9, we have native instructions for all the operations. + // Before that, load and store are legal because fp128 uses vector + // registers. fneg is also an exception which doesn't require call. + if (!ST->hasP9Vector() && !isa(J) && !isa(J) && + J->getOpcode() != Instruction::FNeg) return true; - } else if (isLargeIntegerTy(!TM.isPPC64(), - J->getType()->getScalarType()) && + } else if (isLargeIntegerTy(!TM.isPPC64(), J->getType()->getScalarType()) && (J->getOpcode() == Instruction::UDiv || J->getOpcode() == Instruction::SDiv || J->getOpcode() == Instruction::URem || Index: llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll +++ llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll @@ -1,5 +1,7 @@ -; RUN: llc -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr9 \ -; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr9 \ +; RUN: -mtriple=powerpc64le-unknown-unknown | FileCheck %s --check-prefix=P9 +; RUN: llc < %s -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr8 \ +; RUN: -mtriple=powerpc64le-unknown-unknown | FileCheck %s --check-prefix=P8 @a = internal global fp128 0xL00000000000000000000000000000000, align 16 @x = internal global [4 x fp128] zeroinitializer, align 16 @@ -24,9 +26,93 @@ for.end: ; preds = %for.body ret void -; CHECK-LABEL: fmul_ctrloop_fp128 -; CHECK-NOT: call void @llvm.set.loop.iterations.i64(i64 4) -; CHECK-NOT: call i1 @llvm.loop.decrement.i64(i64 1) +; P9-LABEL: fmul_ctrloop_fp128 +; P9: call void @llvm.set.loop.iterations.i64(i64 4) +; P9: call i1 @llvm.loop.decrement.i64(i64 1) + +; P8-LABEL: fmul_ctrloop_fp128 +; P8-NOT: call void @llvm.set.loop.iterations.i64(i64 4) +; P8-NOT: call i1 @llvm.loop.decrement.i64(i64 1) +} + +define void @fneg_ctrloop_fp128() { +entry: + %0 = load fp128, fp128* @a, align 16 + br label %for.body + +for.body: + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %neg = fneg fp128 %0 + %arrayidx1 = getelementptr inbounds [4 x fp128], [4 x fp128]* @y, i64 0, i64 %i.06 + store fp128 %neg, fp128* %arrayidx1, align 16 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond = icmp eq i64 %inc, 4 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void + +; P9-LABEL: fneg_ctrloop_fp128 +; P9: call void @llvm.set.loop.iterations.i64(i64 4) +; P9: call i1 @llvm.loop.decrement.i64(i64 1) + +; P8-LABEL: fneg_ctrloop_fp128 +; P8: call void @llvm.set.loop.iterations.i64(i64 4) +; P8: call i1 @llvm.loop.decrement.i64(i64 1) +} + +define void @fpext_ctrloop_fp128(double* %a) { +entry: + br label %for.body + +for.body: + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds double, double* %a, i64 %i.06 + %0 = load double, double* %arrayidx, align 8 + %ext = fpext double %0 to fp128 + %arrayidx1 = getelementptr inbounds [4 x fp128], [4 x fp128]* @y, i64 0, i64 %i.06 + store fp128 %ext, fp128* %arrayidx1, align 16 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond = icmp eq i64 %inc, 4 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void + +; P9-LABEL: fpext_ctrloop_fp128 +; P9: call void @llvm.set.loop.iterations.i64(i64 4) +; P9: call i1 @llvm.loop.decrement.i64(i64 1) + +; P8-LABEL: fpext_ctrloop_fp128 +; P8-NOT: call void @llvm.set.loop.iterations.i64(i64 4) +; P8-NOT: call i1 @llvm.loop.decrement.i64(i64 1) +} + +define void @fptrunc_ctrloop_fp128(double* %a) { +entry: + br label %for.body + +for.body: + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds [4 x fp128], [4 x fp128]* @x, i64 0, i64 %i.06 + %0 = load fp128, fp128* %arrayidx, align 16 + %trunc = fptrunc fp128 %0 to double + %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.06 + store double %trunc, double* %arrayidx1, align 16 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond = icmp eq i64 %inc, 4 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void + +; P9-LABEL: fptrunc_ctrloop_fp128 +; P9: call void @llvm.set.loop.iterations.i64(i64 4) +; P9: call i1 @llvm.loop.decrement.i64(i64 1) + +; P8-LABEL: fptrunc_ctrloop_fp128 +; P8-NOT: call void @llvm.set.loop.iterations.i64(i64 4) +; P8-NOT: call i1 @llvm.loop.decrement.i64(i64 1) } declare void @obfuscate(i8*, ...) local_unnamed_addr #2