Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -167,6 +167,12 @@ virtual bool useSoftFloat() const { return false; } + /// Returns true if the instruction should be expanded by the IR-level + /// IntegerDivision pass. + virtual bool shouldExpandDivRemInIR(const BinaryOperator &I) const { + return false; + } + /// Return the pointer type for the given address space, defaults to /// the pointer type from the data layout. /// FIXME: The default needs to be removed once all the code is updated. Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -50,6 +50,10 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm); ModulePass *createAMDGPUAnnotateKernelFeaturesPass(); +/*Modified Integer Division*/ +FunctionPass *createAMDGPUIntegerDivisionPass(const TargetMachine *TM); +extern char &AMDGPUIntegerDivisionID; + void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; @@ -81,6 +85,9 @@ void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); extern char &AMDGPUAnnotateUniformValuesPassID; +void initializeAMDGPUIntegerDivisionPass(PassRegistry&); +extern char AMDGPU64bitDivisionID; + extern Target TheAMDGPUTarget; extern Target TheGCNTarget; Index: lib/Target/AMDGPU/AMDGPU64bitDivision.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPU64bitDivision.h @@ -0,0 +1,12 @@ +namespace llvm +{ + class BinaryOperator; +} + +namespace llvm{ + + + +bool AMDExpandUDivision(BinaryOperator *Div); + +} \ No newline at end of file Index: lib/Target/AMDGPU/AMDGPU64bitDivision.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPU64bitDivision.cpp @@ -0,0 +1,15 @@ +#include "AMDGPU64bitDivision.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + + +bool llvm::AMDExpandUDivision(BinaryOperator *Div) +{ + + errs()<<"\n AMDExpandUDivision Called\n"; + + return true; + +} Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -119,6 +119,10 @@ public: AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI); + bool shouldExpandDivRemInIR(const BinaryOperator &I) const override { + return true; + } + bool isFAbsFree(EVT VT) const override; bool isFNegFree(EVT VT) const override; bool isTruncateFree(EVT Src, EVT Dest) const override; Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -255,6 +255,8 @@ setOperationAction(ISD::CTLZ, VT, Expand); } + + if (!Subtarget->hasBCNT(32)) setOperationAction(ISD::CTPOP, MVT::i32, Expand); @@ -1652,6 +1654,8 @@ return DAG.getMergeValues(Res, DL); } +// FIXME: This seems to be extremely inefficient. SC produces almost 1/3rd as +// many instructions for i64 udiv. void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl &Results) const { Index: lib/Target/AMDGPU/AMDGPUIntegerDivisionPass.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUIntegerDivisionPass.cpp @@ -0,0 +1,139 @@ +//===-- AMDGPUIntegerDivisionPass.cpp - Expand div/mod instructions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file: An IR level pass to perform 64bit Integer division +//===----------------------------------------------------------------------===// + +/*Modified Integer Division Pass*/ +#include "AMDGPU.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/IntegerDivision.h" +#include "AMDGPU64bitDivision.h" + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-integer-division" + +namespace { + +class AMDGPUIntegerDivision : public FunctionPass, + public InstVisitor { + + const TargetMachine *TM; + const TargetLowering *TLI; + + //int checkingVariable; + + bool shouldExpandDivRem(const BinaryOperator &I); + +public: + static char ID; + explicit AMDGPUIntegerDivision(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM), TLI(nullptr) { + } + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { + return "Integer Division Pass"; + } + + bool visitInstruction(Instruction &I) { + return false; + } + + bool visitSDiv(BinaryOperator &I); + bool visitUDiv(BinaryOperator &I); + bool visitSRem(BinaryOperator &I); + bool visitURem(BinaryOperator &I); +}; + +} // End anonymous namespace + +char AMDGPUIntegerDivision::ID = 0; +INITIALIZE_TM_PASS(AMDGPUIntegerDivision, DEBUG_TYPE,"Expand integer division", false, false); +char &llvm::AMDGPUIntegerDivisionID = AMDGPUIntegerDivision::ID; + +bool AMDGPUIntegerDivision::doInitialization(Module &M) { + return false; +} + +bool AMDGPUIntegerDivision::runOnFunction(Function &F) { + if (TM) + TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + + else + errs()<<"\n Target Machine Not Initialized\n"; + + for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { + BasicBlock *BB = &*BBI; + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { + Instruction *I = &*II; + if (visit(*I)) { + BBI = F.begin(); + break; + } + } + } + + return false; +} + +bool AMDGPUIntegerDivision::shouldExpandDivRem(const BinaryOperator &I) { + assert(TLI); + bool shouldExpandInIr = TLI && TLI->shouldExpandDivRemInIR(I); + bool isUdiv64 = I.getOpcode() == Instruction::UDiv && I.getType()->isIntegerTy(64); + return shouldExpandInIr && isUdiv64; +} +/* + Change the function calls to your own menthods instead of the in-built integer division which + introduces more control flow. +*/ +bool AMDGPUIntegerDivision::visitSDiv(BinaryOperator &I) { + if (shouldExpandDivRem(I)) { + expandDivision(&I); + return true; + } + return false; +} + +bool AMDGPUIntegerDivision::visitUDiv(BinaryOperator &I) { + // Should call the underlying IR expansion only for 64 bit divisions + if (shouldExpandDivRem(I)) { + AMDExpandUDivision(&I); + return true; + } + return false; +} + +bool AMDGPUIntegerDivision::visitSRem(BinaryOperator &I) { + if (shouldExpandDivRem(I)) { + expandRemainder(&I); + return true; + } + return false; +} + +bool AMDGPUIntegerDivision::visitURem(BinaryOperator &I) { + if (shouldExpandDivRem(I)) { + expandRemainder(&I); + return true; + } + return false; +} + +FunctionPass *llvm::createAMDGPUIntegerDivisionPass(const TargetMachine *TM) { + return new AMDGPUIntegerDivision(TM); +} \ No newline at end of file Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -36,6 +36,7 @@ #include "llvm/Transforms/Scalar.h" #include + using namespace llvm; extern "C" void LLVMInitializeAMDGPUTarget() { @@ -52,6 +53,9 @@ initializeSILoadStoreOptimizerPass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); initializeAMDGPUAnnotateUniformValuesPass(*PR); + /*Modified Integer Division Pass*/ + initializeAMDGPUIntegerDivisionPass(*PR); + } static std::unique_ptr createTLOF(const Triple &TT) { @@ -200,7 +204,7 @@ // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. addPass(createAMDGPUOpenCLImageTypeLoweringPass()); - + addPass(createAMDGPUIntegerDivisionPass(&getAMDGPUTargetMachine())); TargetPassConfig::addIRPasses(); } @@ -210,7 +214,10 @@ addPass(createAMDGPUPromoteAlloca(ST)); addPass(createSROAPass()); } + TargetPassConfig::addCodeGenPrepare(); + + // addPass(createIntegerDivisionPass(&getAMDGPUTargetMachine())); } bool Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -13,6 +13,8 @@ add_public_tablegen_target(AMDGPUCommonTableGen) add_llvm_target(AMDGPUCodeGen + AMDGPU64bitDivision.cpp + AMDGPU64bitDivision.h AMDILCFGStructurizer.cpp AMDGPUAlwaysInlinePass.cpp AMDGPUAnnotateKernelFeatures.cpp @@ -33,6 +35,7 @@ AMDGPUInstrInfo.cpp AMDGPUPromoteAlloca.cpp AMDGPURegisterInfo.cpp + AMDGPUIntegerDivisionPass.cpp R600ClauseMergePass.cpp R600ControlFlowFinalizer.cpp R600EmitClauseMarkers.cpp Index: tools/opt/opt.cpp =================================================================== --- tools/opt/opt.cpp +++ tools/opt/opt.cpp @@ -326,6 +326,7 @@ // supported. initializeCodeGenPreparePass(Registry); initializeAtomicExpandPass(Registry); + initializeIntegerDivisionPass(Registry); initializeRewriteSymbolsPass(Registry); initializeWinEHPreparePass(Registry); initializeDwarfEHPreparePass(Registry);