diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -17,7 +17,7 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; -static cl::opt ClVectorLibrary( +cl::opt ClVectorLibrary( "vector-library", cl::Hidden, cl::desc("Vector functions library"), cl::init(TargetLibraryInfoImpl::NoLibrary), cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none", diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1060,6 +1060,9 @@ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToLibCall(const char *LibCallName, CallingConv::ID CC, + SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFPOWMASSV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -34,6 +34,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -130,6 +131,8 @@ static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl); +extern cl::opt ClVectorLibrary; + // FIXME: Remove this once the bug has been fixed! extern cl::opt ANDIGlueBug; @@ -780,6 +783,7 @@ if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + setOperationAction(ISD::FPOW, MVT::v4f32, Custom); } if (Subtarget.hasP8Altivec()) @@ -2837,6 +2841,50 @@ MachineMemOperand::MOLoad); } +SDValue PPCTargetLowering::LowerToLibCall(const char *LibCallName, + CallingConv::ID CC, SDValue Op, + SelectionDAG &DAG) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::CallLoweringInfo CLI(DAG); + EVT RetVT = Op.getValueType(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (const SDValue &OpArgs : Op->op_values()) { + EVT ArgVT = OpArgs.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = OpArgs; + Entry.Ty = ArgTy; + Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, false); + Entry.IsZExt = !TLI.shouldSignExtendTypeInLibCall(ArgVT, false); + Args.push_back(Entry); + } + + SDValue Callee = + DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout())); + bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false); + CLI.setDebugLoc(SDLoc(Op)) + .setChain(DAG.getEntryNode()) + .setLibCallee(CC, RetVT.getTypeForEVT(*DAG.getContext()), Callee, + std::move(Args)) + .setTailCall(true) + .setSExtResult(SignExtend) + .setZExtResult(!SignExtend) + .setIsPostTypeLegalization(true); + return TLI.LowerCallTo(CLI).first; +} + +SDValue PPCTargetLowering::LowerFPOWMASSV(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType() != MVT::v4f32 || !Subtarget.hasP8Vector() || + ClVectorLibrary != TargetLibraryInfoImpl::MASSV) + return SDValue(); + + if (Subtarget.hasP9Vector()) + return LowerToLibCall("__powf4_P9", CallingConv::C, Op, DAG); + else + return LowerToLibCall("__powf4_P8", CallingConv::C, Op, DAG); +} + SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); @@ -10890,6 +10938,7 @@ case ISD::MUL: return LowerMUL(Op, DAG); case ISD::ABS: return LowerABS(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ISD::FPOW: return LowerFPOWMASSV(Op, DAG); // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); diff --git a/llvm/test/CodeGen/PowerPC/pow_massv_0.75exp.ll b/llvm/test/CodeGen/PowerPC/pow_massv_0.75exp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pow_massv_0.75exp.ll @@ -0,0 +1,96 @@ +; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s +; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s +; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr7 | FileCheck -check-prefixes=CHECK-PWR7 %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-NOMASSV %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-NOMASSV %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr7 | FileCheck -check-prefixes=CHECK-NOMASSV %s + +; Exponent is a variable +define void @vspow_no075(float* nocapture %z, float* nocapture readonly %y, float* nocapture readonly %x) { +; CHECK-LABEL: @vspow_no075 +; CHECK-PWR9: bl __powf4_P9 +; CHECK-PWR8: bl __powf4_P8 +; CHECK-PWR7: bl powf +; CHECK-NOMASSV-NOT: bl __powf4_P{{[8,9]}} +; CHECK: blr +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] + %next.gep = getelementptr float, float* %z, i64 %index + %next.gep31 = getelementptr float, float* %y, i64 %index + %next.gep32 = getelementptr float, float* %x, i64 %index + %0 = bitcast float* %next.gep32 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %0, align 4 + %1 = bitcast float* %next.gep31 to <4 x float>* + %wide.load33 = load <4 x float>, <4 x float>* %1, align 4 + %2 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %wide.load, <4 x float> %wide.load33) + %3 = bitcast float* %next.gep to <4 x float>* + store <4 x float> %2, <4 x float>* %3, align 4 + %index.next = add i64 %index, 4 + %4 = icmp eq i64 %index.next, 1024 + br i1 %4, label %for.end, label %vector.body + +for.end: + ret void +} + +; Exponent is a constant != 0.75 +define void @vspow_no075c(float* nocapture %y, float* nocapture readonly %x) { +; CHECK-LABEL: @vspow_no075c +; CHECK-PWR9: bl __powf4_P9 +; CHECK-PWR8: bl __powf4_P8 +; CHECK-PWR7: bl powf +; CHECK-NOMASSV-NOT: bl __powf4_P{{[8,9]}} +; CHECK: blr +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] + %next.gep = getelementptr float, float* %y, i64 %index + %next.gep19 = getelementptr float, float* %x, i64 %index + %0 = bitcast float* %next.gep19 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %0, align 4 + %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %wide.load, <4 x float> ) + %2 = bitcast float* %next.gep to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + %index.next = add i64 %index, 4 + %3 = icmp eq i64 %index.next, 1024 + br i1 %3, label %for.end, label %vector.body + +for.end: + ret void +} + +; Exponent is 0.75 +define void @vspow_075(float* nocapture %y, float* nocapture readonly %x) { +; CHECK-LABEL: @vspow_075 +; CHECK-NOT: bl __powf4_P{{[8,9]}} +; CHECK: xvrsqrtesp +; CHECK-NOMASSV-NOT: bl __powf4_P{{[8,9]}} +; CHECK: blr +entry: + br label %vector.body + +vector.body: + %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] + %next.gep = getelementptr float, float* %y, i64 %index + %next.gep19 = getelementptr float, float* %x, i64 %index + %0 = bitcast float* %next.gep19 to <4 x float>* + %wide.load = load <4 x float>, <4 x float>* %0, align 4 + %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %wide.load, <4 x float> ) + %2 = bitcast float* %next.gep to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + %index.next = add i64 %index, 4 + %3 = icmp eq i64 %index.next, 1024 + br i1 %3, label %for.end, label %vector.body + +for.end: + ret void +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) + diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll --- a/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/massv-calls.ll @@ -209,31 +209,6 @@ ret void } -define void @pow_f32_intrinsic(float* nocapture %varray, float* nocapture readonly %exp) { -; CHECK-LABEL: @pow_f32_intrinsic( -; CHECK: __powf4_massv{{.*}}<4 x float> -; CHECK: ret void -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %tmp = trunc i64 %iv to i32 - %conv = sitofp i32 %tmp to float - %arrayidx = getelementptr inbounds float, float* %exp, i64 %iv - %tmp1 = load float, float* %arrayidx, align 4 - %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1) - %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %iv - store float %tmp2, float* %arrayidx2, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - define void @sqrt_f64(double* nocapture %varray) { ; CHECK-LABEL: @sqrt_f64( ; CHECK: __sqrtd2_massv{{.*}}<2 x double>