Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -392,6 +392,9 @@ /// load-linked/store-conditional loops. extern char &AtomicExpandID; + /// Lowers unsupported integer division. + extern char &IntegerDivisionID; + /// MachineLoopInfo - This pass is a loop analysis pass. extern char &MachineLoopInfoID; @@ -646,6 +649,9 @@ /// createJumpInstrTables - This pass creates jump-instruction tables. ModulePass *createJumpInstrTablesPass(); + /// Lower unsupported integer division + FunctionPass *createIntegerDivisionPass(const TargetMachine *TM); + /// createForwardControlFlowIntegrityPass - This pass adds control-flow /// integrity. ModulePass *createForwardControlFlowIntegrityPass(); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -149,6 +149,7 @@ void initializeInstructionCombiningPassPass(PassRegistry&); void initializeInstCountPass(PassRegistry&); void initializeInstNamerPass(PassRegistry&); +void initializeIntegerDivisionPass(PassRegistry&); void initializeInternalizePassPass(PassRegistry&); void initializeIntervalPartitionPass(PassRegistry&); void initializeJumpThreadingPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -145,6 +145,8 @@ (void) llvm::createStripDeadPrototypesPass(); (void) llvm::createTailCallEliminationPass(); (void) llvm::createJumpThreadingPass(); + /*AMDGPU64bit*/ + //(void) llvm::createIntegerDivisionPass(nullptr); (void) llvm::createUnifyFunctionExitNodesPass(); (void) llvm::createInstCountPass(); (void) llvm::createConstantHoistingPass(); Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -167,6 +167,12 @@ virtual bool useSoftFloat() const { return false; } + /// Returns true if the instruction should be expanded by the IR-level + /// IntegerDivision pass. + virtual bool shouldExpandDivRemInIR(const BinaryOperator &I) const { + return false; + } + /// Return the pointer type for the given address space, defaults to /// the pointer type from the data layout. /// FIXME: The default needs to be removed once all the code is updated. Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -50,6 +50,10 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm); ModulePass *createAMDGPUAnnotateKernelFeaturesPass(); +/*Modified Integer Division*/ +FunctionPass *createAMDGPUIntegerDivisionPass(const TargetMachine *TM); +extern char &AMDGPUIntegerDivisionID; + void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; @@ -82,6 +86,12 @@ void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); extern char &AMDGPUAnnotateUniformValuesPassID; +/*Modified Integer Division : IR Pass to perform 64bit integer division*/ + +void initializeAMDGPUIntegerDivisionPass(PassRegistry&); +extern char AMDGPU64bitDivisionID; + + extern Target TheAMDGPUTarget; extern Target TheGCNTarget; Index: lib/Target/AMDGPU/AMDGPU64bitDivision.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPU64bitDivision.h @@ -0,0 +1,29 @@ +#include "AMDGPUISelLowering.h" +#include "AMDGPU.h" +#include "AMDGPUDiagnosticInfoUnsupported.h" +#include "AMDGPUFrameLowering.h" +#include "AMDGPUIntrinsicInfo.h" +#include "AMDGPURegisterInfo.h" +#include "AMDGPUSubtarget.h" +#include "R600MachineFunctionInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" + +namespace llvm +{ + class BinaryOperator; +} + +namespace llvm{ + + + +bool AMDExpandUDivision(BinaryOperator *Div); + +} \ No newline at end of file Index: lib/Target/AMDGPU/AMDGPU64bitDivision.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPU64bitDivision.cpp @@ -0,0 +1,15 @@ +#include "AMDGPU64bitDivision.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + + +bool llvm::AMDExpandUDivision(BinaryOperator *Div) +{ + + errs()<<"\n AMDExpandUDivision Called\n"; + + return true; + +} Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -119,6 +119,10 @@ public: AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI); + bool shouldExpandDivRemInIR(const BinaryOperator &I) const override { + return true; + } + bool isFAbsFree(EVT VT) const override; bool isFNegFree(EVT VT) const override; bool isTruncateFree(EVT Src, EVT Dest) const override; Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -253,8 +253,12 @@ setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); + + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); } + + if (!Subtarget->hasBCNT(32)) setOperationAction(ISD::CTPOP, MVT::i32, Expand); @@ -1652,6 +1656,8 @@ return DAG.getMergeValues(Res, DL); } +// FIXME: This seems to be extremely inefficient. SC produces almost 1/3rd as +// many instructions for i64 udiv. void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl &Results) const { Index: lib/Target/AMDGPU/AMDGPUIntegerDivisionPass.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUIntegerDivisionPass.cpp @@ -0,0 +1,150 @@ +//===-- IntegerDivisionPass.cpp - Expand div/mod instructions -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +//===----------------------------------------------------------------------===// + +/*Modified Integer Division Pass*/ + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/IntegerDivision.h" +#include "AMDGPU.h" +#include "AMDGPU64bitDivision.h" + +using namespace llvm; + +#define DEBUG_TYPE "AMDGPU integer-division" + +namespace { + +class AMDGPUIntegerDivision : public FunctionPass, + public InstVisitor { + + const TargetMachine *TM; + const TargetLowering *TLI; + + int checkingVariable; + + bool shouldExpandDivRem(const BinaryOperator &I); + +public: + static char ID; + explicit AMDGPUIntegerDivision(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM), TLI(nullptr) { + } + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { + return "Integer Division Pass"; + } + + bool visitInstruction(Instruction &I) { + return false; + } + + bool visitSDiv(BinaryOperator &I); + bool visitUDiv(BinaryOperator &I); + bool visitSRem(BinaryOperator &I); + bool visitURem(BinaryOperator &I); +}; + +} // End anonymous namespace + +char AMDGPUIntegerDivision::ID = 0; +//char &llvm::IntegerDivisionID = AMDGPUIntegerDivision::ID; +INITIALIZE_TM_PASS(AMDGPUIntegerDivision, DEBUG_TYPE,"Expand integer division", false, false); + +char &llvm::AMDGPUIntegerDivisionID = AMDGPUIntegerDivision::ID; +/*INITIALIZE_PASS_BEGIN(AMDGPUIntegerDivision, DEBUG_TYPE, + "Add AMDGPU function attributes", false, false) +INITIALIZE_PASS_END(AMDGPUIntegerDivision, DEBUG_TYPE, + "Add AMDGPU function attributes", false, false)*/ + +bool AMDGPUIntegerDivision::doInitialization(Module &M) { + return false; +} + +bool AMDGPUIntegerDivision::runOnFunction(Function &F) { +// llvm_unreachable("does this happen"); + if (TM) + TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + + else + errs()<<"\n Target Machine Not Initialized\n"; + + for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { + BasicBlock *BB = &*BBI; + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { + Instruction *I = &*II; + if (visit(*I)) { + BBI = F.begin(); + break; + } + } + } + + return false; +} + +bool AMDGPUIntegerDivision::shouldExpandDivRem(const BinaryOperator &I) { + assert(TLI); + bool shouldExpandInIr = TLI && TLI->shouldExpandDivRemInIR(I); + // TODO:Uthkarsh later modify to handle signed 64 bit too. + bool isUdiv64 = I.getOpcode() == Instruction::UDiv && I.getType()->getIntegerBitWidth() == 64; + return shouldExpandInIr && isUdiv64; +} +/*TODO:Uthkarsh + Change the function calls to your own menthods instead of the in-built integer division which + introduces more control flow. +*/ +bool AMDGPUIntegerDivision::visitSDiv(BinaryOperator &I) { + if (shouldExpandDivRem(I)) { + expandDivision(&I); + return true; + } + return false; +} + +bool AMDGPUIntegerDivision::visitUDiv(BinaryOperator &I) { + // Should call the underlying IR expansion only for 64 bit divisions + if (shouldExpandDivRem(I)) { + AMDExpandUDivision(&I); + return true; + } + return false; +} + +bool AMDGPUIntegerDivision::visitSRem(BinaryOperator &I) { + if (shouldExpandDivRem(I)) { + expandRemainder(&I); + return true; + } + return false; +} + +bool AMDGPUIntegerDivision::visitURem(BinaryOperator &I) { + if (shouldExpandDivRem(I)) { + expandRemainder(&I); + return true; + } + return false; +} + +FunctionPass *llvm::createAMDGPUIntegerDivisionPass(const TargetMachine *TM) { + return new AMDGPUIntegerDivision(TM); +} + +//static RegisterPass X("AMD GPU Integer Division", "IR level 64 bit integer division",false,false); \ No newline at end of file Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -36,6 +36,7 @@ #include "llvm/Transforms/Scalar.h" #include + using namespace llvm; extern "C" void LLVMInitializeAMDGPUTarget() { @@ -52,6 +53,9 @@ initializeSILoadStoreOptimizerPass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); initializeAMDGPUAnnotateUniformValuesPass(*PR); + /*Modified Integer Division Pass*/ + initializeAMDGPUIntegerDivisionPass(*PR); + } static std::unique_ptr createTLOF(const Triple &TT) { @@ -200,7 +204,8 @@ // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. addPass(createAMDGPUOpenCLImageTypeLoweringPass()); - + /*TODO:Uthkarsh - Integer Division*/ + addPass(createAMDGPUIntegerDivisionPass(&getAMDGPUTargetMachine())); TargetPassConfig::addIRPasses(); } @@ -210,7 +215,10 @@ addPass(createAMDGPUPromoteAlloca(ST)); addPass(createSROAPass()); } + TargetPassConfig::addCodeGenPrepare(); + + // addPass(createIntegerDivisionPass(&getAMDGPUTargetMachine())); } bool Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -13,6 +13,8 @@ add_public_tablegen_target(AMDGPUCommonTableGen) add_llvm_target(AMDGPUCodeGen + AMDGPU64bitDivision.cpp + AMDGPU64bitDivision.h AMDILCFGStructurizer.cpp AMDGPUAlwaysInlinePass.cpp AMDGPUAnnotateKernelFeatures.cpp @@ -33,6 +35,7 @@ AMDGPUInstrInfo.cpp AMDGPUPromoteAlloca.cpp AMDGPURegisterInfo.cpp + AMDGPUIntegerDivisionPass.cpp R600ClauseMergePass.cpp R600ControlFlowFinalizer.cpp R600EmitClauseMarkers.cpp Index: tools/opt/opt.cpp =================================================================== --- tools/opt/opt.cpp +++ tools/opt/opt.cpp @@ -326,6 +326,7 @@ // supported. initializeCodeGenPreparePass(Registry); initializeAtomicExpandPass(Registry); + initializeIntegerDivisionPass(Registry); initializeRewriteSymbolsPass(Registry); initializeWinEHPreparePass(Registry); initializeDwarfEHPreparePass(Registry);