diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -96,6 +96,8 @@ BUILTIN(__builtin_ppc_swdivs_nochk, "fff", "") BUILTIN(__builtin_ppc_alignx, "vIivC*", "nc") BUILTIN(__builtin_ppc_rdlam, "UWiUWiUWiUWIi", "nc") +BUILTIN(__builtin_ppc_swdiv, "ddd", "") +BUILTIN(__builtin_ppc_swdivs, "fff", "") // Compare BUILTIN(__builtin_ppc_cmpeqb, "LLiLLiLLi", "") BUILTIN(__builtin_ppc_cmprb, "iCIiii", "") diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -236,6 +236,8 @@ Builder.defineMacro("__frsqrtes", "__builtin_ppc_frsqrtes"); Builder.defineMacro("__fsqrt", "__builtin_ppc_fsqrt"); Builder.defineMacro("__fsqrts", "__builtin_ppc_fsqrts"); + Builder.defineMacro("__swdiv", "__builtin_ppc_swdiv"); + Builder.defineMacro("__swdivs", "__builtin_ppc_swdivs"); } /// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-swdiv.c b/clang/test/CodeGen/builtins-ppc-xlcompat-swdiv.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-swdiv.c @@ -0,0 +1,54 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc-unknown-aix \ +// RUN: -emit-llvm %s -o - -target-cpu pwr7 | FileCheck %s + +extern double a; +extern double b; +extern float c; +extern float d; + +// CHECK-LABEL: @test_swdiv( +// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @b, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.ppc.swdiv(double [[TMP0]], double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] +// +double test_swdiv() { + return __swdiv(a, b); +} + +// CHECK-LABEL: @test_swdivs( +// CHECK: [[TMP0:%.*]] = load float, float* @c, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.ppc.swdivs(float [[TMP0]], float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] +// +float test_swdivs() { + return __swdivs(c, d); +} + +// CHECK-LABEL: @test_builtin_ppc_swdiv( +// CHECK: [[TMP0:%.*]] = load double, double* @a, align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* @b, align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.ppc.swdiv(double [[TMP0]], double [[TMP1]]) +// CHECK-NEXT: ret double [[TMP2]] +// +double test_builtin_ppc_swdiv() { + return __builtin_ppc_swdiv(a, b); +} + +// CHECK-LABEL: @test_builtin_ppc_swdivs( +// CHECK: [[TMP0:%.*]] = load float, float* @c, align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* @d, align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.ppc.swdivs(float [[TMP0]], float [[TMP1]]) +// CHECK-NEXT: ret float [[TMP2]] +// +float test_builtin_ppc_swdivs() { + return __builtin_ppc_swdivs(c, d); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1432,6 +1432,8 @@ // PowerPC set FPSCR Intrinsic Definitions. def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">, Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>; + +def int_ppc_ftdivdp : Intrinsic<[llvm_i32_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem]>; } let TargetPrefix = "ppc" in { @@ -1717,6 +1719,12 @@ Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_frsqrtes : GCCBuiltin<"__builtin_ppc_frsqrtes">, Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_ppc_swdiv : GCCBuiltin<"__builtin_ppc_swdiv">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; + def int_ppc_swdivs : GCCBuiltin<"__builtin_ppc_swdivs">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -58,6 +58,7 @@ GISel/PPCCallLowering.cpp GISel/PPCRegisterBankInfo.cpp GISel/PPCLegalizerInfo.cpp + PPCLowerCheckedFPArith.cpp LINK_COMPONENTS Analysis diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -53,6 +53,7 @@ FunctionPass *createPPCExpandISELPass(); FunctionPass *createPPCPreEmitPeepholePass(); FunctionPass *createPPCExpandAtomicPseudoPass(); + FunctionPass *createPPCLowerCheckedFPArithPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, @@ -77,6 +78,7 @@ void initializePPCTLSDynamicCallPass(PassRegistry &); void initializePPCMIPeepholePass(PassRegistry&); void initializePPCExpandAtomicPseudoPass(PassRegistry &); + void initializePPCLowerCheckedFPArithPass(PassRegistry &); extern char &PPCVSXFMAMutateID; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -4422,6 +4422,7 @@ def : Pat<(int_ppc_fsel f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), (FSELD $FRA, $FRC, $FRB)>; def : Pat<(int_ppc_frsqrte f8rc:$frB), (FRSQRTE $frB)>; def : Pat<(int_ppc_frsqrtes f4rc:$frB), (FRSQRTES $frB)>; +def : Pat<(int_ppc_ftdivdp f8rc:$fA, f8rc:$fB), (FTDIV $fA, $fB)>; //===----------------------------------------------------------------------===// // PowerPC Instructions used for assembler/disassembler only diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2858,6 +2858,7 @@ def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (XSNMADDMDP $A, $B, $C)>; def : Pat<(int_ppc_fre f64:$A), (XSREDP $A)>; def : Pat<(int_ppc_frsqrte vsfrc:$XB), (XSRSQRTEDP $XB)>; +def : Pat<(int_ppc_ftdivdp vsfrc:$XA, vsfrc:$XB), (XSTDIVDP $XA, $XB)>; } // HasVSX // Any big endian VSX subtarget. diff --git a/llvm/lib/Target/PowerPC/PPCLowerCheckedFPArith.cpp b/llvm/lib/Target/PowerPC/PPCLowerCheckedFPArith.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCLowerCheckedFPArith.cpp @@ -0,0 +1,132 @@ +//===- PPCLowerCheckedFPArith.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering of checked floating point arithmetic. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +#define DEBUG_TYPE "ppc-lower-checked-fp-arith" + +class PPCLowerCheckedFPArith : public FunctionPass { +public: + static char ID; + + PPCLowerCheckedFPArith() : FunctionPass(ID) { + initializePPCLowerCheckedFPArithPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + bool Changed = false; + std::vector SWDivToLower; + + // Search the function for intrinsic calls to lower. + for (auto &BB : F) { + for (auto &I : BB) { + if (CallInst *CI = dyn_cast(&I)) { + if (CI->getIntrinsicID() == Intrinsic::ppc_swdiv || + CI->getIntrinsicID() == Intrinsic::ppc_swdivs) { + SWDivToLower.push_back(CI); + } + } + } + } + + // Lower the intrinsic calls. + for (auto &CI : SWDivToLower) { + Changed = true; + + IRBuilder<> Builder(F.getContext()); + + Value *Cond = nullptr; + + BasicBlock *ParentBlock = CI->getParent(); + BasicBlock *HWDivBlock = + BasicBlock::Create(F.getContext(), "swdiv_HWDIV"); + + // Split the basic block containing the intrinsic call into two basic + // blocks. Remove the automatically inserted terminator from the parent + // block. + BasicBlock *MergeBlock = + ParentBlock->splitBasicBlock(CI->getNextNode(), "swdiv_MERGE"); + ParentBlock->getTerminator()->eraseFromParent(); + + Builder.SetInsertPoint(CI); + + // If the intrinsic call is for double precision floating point, test + // whether the software division will be accurate while calculating it. + if (CI->getIntrinsicID() == Intrinsic::ppc_swdiv) { + Function *Int = + Intrinsic::getDeclaration(F.getParent(), Intrinsic::ppc_ftdivdp); + Value *TestResult = Builder.CreateCall( + Int, {CI->getArgOperand(0), CI->getArgOperand(1)}); + Cond = Builder.CreateICmpEQ(TestResult, + ConstantInt::get(TestResult->getType(), 0)); + } + + // Save the fast math flags. Emit a fast floating point division. + // Restore the fast math flags. + FastMathFlags FMF = Builder.getFastMathFlags(); + Builder.getFastMathFlags().setFast(); + Value *SWDiv = + Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); + Builder.getFastMathFlags() &= (FMF); + + // If the intrinsic is for single precision floating point, test if the + // result of the software divide is NaN. + if (CI->getIntrinsicID() == Intrinsic::ppc_swdivs) { + Cond = Builder.CreateFCmpUNE(SWDiv, SWDiv); + } + + // If the test failed, perform a hardware division. + Builder.CreateCondBr(Cond, HWDivBlock, MergeBlock); + Builder.SetInsertPoint(HWDivBlock); + Value *HWDiv = + Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); + Builder.CreateBr(MergeBlock); + + // Create a PHI node to select the correct result. Replace all uses of + // the intrinsic call with the result selected. Remove the intrinsic call + // from the IR. + Builder.SetInsertPoint(MergeBlock, MergeBlock->getFirstInsertionPt()); + PHINode *PN = Builder.CreatePHI(CI->getType(), 2); + PN->addIncoming(SWDiv, ParentBlock); + PN->addIncoming(HWDiv, HWDivBlock); + CI->replaceAllUsesWith(PN); + CI->eraseFromParent(); + + // Push the basic blocks in the correct order. + F.getBasicBlockList().remove(MergeBlock); + F.getBasicBlockList().push_back(HWDivBlock); + F.getBasicBlockList().push_back(MergeBlock); + } + + return Changed; + } +}; + +} // end anonymous namespace + +char PPCLowerCheckedFPArith::ID = 0; + +INITIALIZE_PASS(PPCLowerCheckedFPArith, "ppc-lower-checked-fp-arith", + "Lower Checked Floating Point Arithmetic", false, false) + +FunctionPass *llvm::createPPCLowerCheckedFPArithPass() { + return new PPCLowerCheckedFPArith(); +} diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -125,6 +125,7 @@ initializePPCLowerMASSVEntriesPass(PR); initializePPCExpandAtomicPseudoPass(PR); initializeGlobalISel(PR); + initializePPCLowerCheckedFPArithPass(PR); } static bool isLittleEndianTriple(const Triple &T) { @@ -447,6 +448,8 @@ } TargetPassConfig::addIRPasses(); + + addPass(createPPCLowerCheckedFPArithPass()); } bool PPCPassConfig::addPreISel() { diff --git a/llvm/test/CodeGen/PowerPC/LowerCheckedFPArith.ll b/llvm/test/CodeGen/PowerPC/LowerCheckedFPArith.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/LowerCheckedFPArith.ll @@ -0,0 +1,53 @@ +; RUN: opt -ppc-lower-checked-fp-arith -S -o - < %s | FileCheck %s + +@a = external local_unnamed_addr global double +@b = external local_unnamed_addr global double +@c = external local_unnamed_addr global float +@d = external local_unnamed_addr global float + +; CHECK-LABEL: @test_swdiv( +; CHECK: %0 = load double, double* @a, align 8 +; CHECK-NEXT: %1 = load double, double* @b, align 8 +; CHECK-NEXT: %2 = call i32 @llvm.ppc.ftdivdp(double %0, double %1) +; CHECK-NEXT: %3 = icmp eq i32 %2, 0 +; CHECK-NEXT: %4 = fdiv fast double %0, %1 +; CHECK-NEXT: br i1 %3, label %swdiv_HWDIV, label %swdiv_MERGE + +; CHECK: %5 = fdiv double %0, %1 +; CHECK-NEXT: br label %swdiv_MERGE + +; CHECK: %6 = phi double [ %4, %entry ], [ %5, %swdiv_HWDIV ] +; CHECK-NEXT: ret double %6 + +define dso_local double @test_swdiv() local_unnamed_addr { +entry: + %0 = load double, double* @a + %1 = load double, double* @b + %2 = tail call double @llvm.ppc.swdiv(double %0, double %1) + ret double %2 +} + +declare double @llvm.ppc.swdiv(double, double) + +; CHECK-LABEL: @test_swdivs( +; CHECK: %0 = load float, float* @c, align 4 +; CHECK-NEXT: %1 = load float, float* @d, align 4 +; CHECK-NEXT: %2 = fdiv fast float %0, %1 +; CHECK-NEXT: %3 = fcmp une float %2, %2 +; CHECK-NEXT: br i1 %3, label %swdiv_HWDIV, label %swdiv_MERGE + +; CHECK: %4 = fdiv float %0, %1 +; CHECK-NEXT: br label %swdiv_MERGE + +; CHECK: %5 = phi float [ %2, %entry ], [ %4, %swdiv_HWDIV ] +; CHECK-NEXT: ret float %5 + +define dso_local float @test_swdivs() local_unnamed_addr { +entry: + %0 = load float, float* @c + %1 = load float, float* @d + %2 = tail call float @llvm.ppc.swdivs(float %0, float %1) + ret float %2 +} + +declare float @llvm.ppc.swdivs(float, float) diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -63,6 +63,8 @@ ; CHECK-NEXT: Expand vector predication intrinsics ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics +; CHECK-NEXT: Lower Checked Floating Point Arithmetic +; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction @@ -206,4 +208,4 @@ define void @f() { ret void -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/PowerPC/int-ppc-ftdivdp.ll b/llvm/test/CodeGen/PowerPC/int-ppc-ftdivdp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/int-ppc-ftdivdp.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN: -mattr=-vsx -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-NOVSX + +define dso_local i32 @test_ftdivdp(double %a, double %b) local_unnamed_addr { +; CHECK-LABEL: test_ftdivdp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xstdivdp 0, 1, 2 +; CHECK-NEXT: mfocrf 3, 128 +; CHECK-NEXT: srwi 3, 3, 28 +; CHECK-NEXT: blr +; +; CHECK-NOVSX-LABEL: test_ftdivdp: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: ftdiv 0, 1, 2 +; CHECK-NOVSX-NEXT: mfocrf 3, 128 +; CHECK-NOVSX-NEXT: srwi 3, 3, 28 +; CHECK-NOVSX-NEXT: blr +entry: + %c = tail call i32 @llvm.ppc.ftdivdp(double %a, double %b) + ret i32 %c +} + +declare i32 @llvm.ppc.ftdivdp(double, double)