diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -45,6 +45,7 @@ FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, ()) FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, ()) +FUNCTION_PASS("expand-powi", ExpandPowiPass, ()) FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ()) FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -518,6 +518,9 @@ // Expands large div/rem instructions. FunctionPass *createExpandLargeFpConvertPass(); + // Expands powi instructions. + FunctionPass *createExpandPowiPass(); + // This pass expands memcmp() to load/stores. FunctionPass *createExpandMemCmpPass(); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1681,6 +1681,11 @@ [ LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; + def int_vp_powi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; // Casts def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -362,6 +362,10 @@ BEGIN_REGISTER_VP(vp_nearbyint, 1, 2, VP_FNEARBYINT, -1) END_REGISTER_VP(vp_nearbyint, VP_FNEARBYINT) +// llvm.vp.powi(x, y, mask,vlen) +BEGIN_REGISTER_VP_INTRINSIC(vp_powi, 2, 3) +VP_PROPERTY_BINARYOP +END_REGISTER_VP_INTRINSIC(vp_powi) ///// } Floating-Point Arithmetic ///// Type Casts { diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -129,6 +129,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&); void initializeExpandMemCmpPassPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); +void initializeExpandPowiLegacyPassPass(PassRegistry &); void initializeExpandReductionsPass(PassRegistry&); void initializeExpandVectorPredicationPass(PassRegistry &); void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -58,6 +58,7 @@ ExpandLargeFpConvert.cpp ExpandMemCmp.cpp ExpandPostRAPseudos.cpp + ExpandPowi.cpp ExpandReductions.cpp ExpandVectorPredication.cpp FaultMaps.cpp diff --git a/llvm/lib/CodeGen/ExpandPowi.cpp b/llvm/lib/CodeGen/ExpandPowi.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/ExpandPowi.cpp @@ -0,0 +1,170 @@ +//===--- ExpandPowi.cpp - Expand Powi intrinsics ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements IR expansion for powi/vp.powi. The expansion is based on +// compiler-rt/__powidf2.c. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "expand-powi" + +using namespace llvm; + +// The expansioin is based on the c code of compiler-rt/__powidf2.c, +// const int recip = b < 0; +// double r = 1; +// while (1) { +// if (b & 1) +// r *= a; +// b /= 2; +// if (b == 0) +// break; +// a *= a; +// } +// return recip ? 1 / r : r; +static void expandPowi(IntrinsicInst *II) { + Value *OrigBase = II->getOperand(0); + Value *OrigExp = II->getOperand(1); + Value *Mask = II->getOperand(2); + Value *EVL = II->getOperand(3); + + BasicBlock *PreLoopBB = II->getParent(); + BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(II, "powi-post-loop"); + BasicBlock *LoopBody = + BasicBlock::Create(PreLoopBB->getContext(), "powi-forward-loop", + PreLoopBB->getParent(), PostLoopBB); + + IRBuilder<> Builder(PreLoopBB->getTerminator()); + Builder.CreateBr(LoopBody); + PreLoopBB->getTerminator()->eraseFromParent(); + + Type *BaseTy = OrigBase->getType(); + Type *ExpTy = OrigExp->getType(); + Type *CondTy = ExpTy->getWithNewBitWidth(1); + Value *True = ConstantInt::get(CondTy, 1); + + Builder.SetInsertPoint(LoopBody); + // Create phi of base. + Value *Base = Builder.CreatePHI(BaseTy, 2, "base"); + cast(Base)->addIncoming(OrigBase, PreLoopBB); + // Create phi of exponent. + Value *Exp = Builder.CreatePHI(ExpTy, 2, "exp"); + cast(Exp)->addIncoming(OrigExp, PreLoopBB); + // Create phi of res. + Value *Res = Builder.CreatePHI(BaseTy, 2, "res"); + cast(Res)->addIncoming(ConstantFP::get(BaseTy, 1.), PreLoopBB); + // Res *= Base if Exp is odd. + Value *Tmp = Builder.CreateIntrinsic(BaseTy, Intrinsic::vp_fmul, + {Res, Base, True, EVL}); + Value *And1 = Builder.CreateIntrinsic( + ExpTy, Intrinsic::vp_and, {Exp, ConstantInt::get(ExpTy, 1), True, EVL}); + // FIXME: Use vp.icmp. + Value *IsOdd = Builder.CreateICmpNE(And1, ConstantInt::get(ExpTy, 0)); + Value *NewRes = Builder.CreateIntrinsic(BaseTy, Intrinsic::vp_select, + {IsOdd, Tmp, Res, EVL}); + cast(Res)->addIncoming(NewRes, LoopBody); + // Update Exp. + Value *NewExp = Builder.CreateIntrinsic( + ExpTy, Intrinsic::vp_lshr, {Exp, ConstantInt::get(ExpTy, 1), True, EVL}); + cast(Exp)->addIncoming(NewExp, LoopBody); + // Update Base. + Value *NewBase = Builder.CreateIntrinsic(BaseTy, Intrinsic::vp_fmul, + {Base, Base, True, EVL}); + cast(Base)->addIncoming(NewBase, LoopBody); + // Check whether the elements of Exp are all zeros. + Type *ExpScalarTy = ExpTy->getScalarType(); + Value *ScalarZero = ConstantInt::get(ExpScalarTy, 0); + Value *OrSum = Builder.CreateIntrinsic(ExpScalarTy, Intrinsic::vp_reduce_or, + {ScalarZero, NewExp, Mask, EVL}); + Builder.CreateCondBr(Builder.CreateICmpEQ(OrSum, ScalarZero), PostLoopBB, + LoopBody); + + Builder.SetInsertPoint(&PostLoopBB->front()); + // Use reciprocal if power is negative. + Value *Recip = + Builder.CreateIntrinsic(BaseTy, Intrinsic::vp_fdiv, + {ConstantFP::get(BaseTy, 1.), NewRes, Mask, EVL}); + // FIXME: Use vp.icmp. + Value *IsNegative = + Builder.CreateICmpSLT(OrigExp, ConstantInt::get(ExpTy, 0)); + Res = Builder.CreateIntrinsic(BaseTy, Intrinsic::vp_select, + {IsNegative, Recip, NewRes, EVL}); + + II->replaceAllUsesWith(Res); + II->eraseFromParent(); +} + +// TODO: Add cost model to skip small fixed vectors powi. +static bool runImpl(Function &F, const TargetLowering &TLI) { + SmallVector Replace; + for (auto &I : instructions(F)) { + if (auto *II = dyn_cast(&I)) { + // TODO: Also suppot llvm.powi. + if (II->getIntrinsicID() == Intrinsic::vp_powi) { + Replace.push_back(II); + } + } + } + + if (Replace.empty()) + return false; + + for (IntrinsicInst *II : Replace) + expandPowi(II); + + return true; +} + +namespace { +class ExpandPowiLegacyPass : public FunctionPass { +public: + static char ID; + + ExpandPowiLegacyPass() : FunctionPass(ID) { + initializeExpandPowiLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + auto *TM = &getAnalysis().getTM(); + auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + return runImpl(F, *TLI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } +}; +} // namespace + +char ExpandPowiLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(ExpandPowiLegacyPass, "expand-powi", + "Expand powi functions", false, false) +INITIALIZE_PASS_END(ExpandPowiLegacyPass, "expand-powi", + "Expand powi functions", false, false) + +FunctionPass *llvm::createExpandPowiPass() { + return new ExpandPowiLegacyPass(); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1087,6 +1087,7 @@ PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); addPass(createExpandLargeDivRemPass()); addPass(createExpandLargeFpConvertPass()); + addPass(createExpandPowiPass()); addIRPasses(); addCodeGenPrepare(); addPassesToHandleExceptions(); diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -393,6 +393,7 @@ "expand-large-div-rem", "structurizecfg", "fix-irreducible", + "expand-powi", "expand-large-fp-convert" }; for (const auto &P : PassNamePrefix) @@ -443,6 +444,7 @@ initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandLargeFpConvertLegacyPassPass(Registry); initializeExpandMemCmpPassPass(Registry); + initializeExpandPowiLegacyPassPass(Registry); initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry); initializeSelectOptimizePass(Registry); initializeCodeGenPreparePass(Registry);