Index: lib/CodeGen/ManagedMemoryRewrite.cpp =================================================================== --- lib/CodeGen/ManagedMemoryRewrite.cpp +++ lib/CodeGen/ManagedMemoryRewrite.cpp @@ -34,6 +34,7 @@ #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DerivedUser.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" @@ -43,6 +44,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" namespace { static llvm::Function *GetOrCreatePollyMallocManaged(Module &M) { @@ -89,13 +91,237 @@ return F; } +// Expand the constant expression Cur using Builder. This will recursively +// expand Instruction. `Expands` contains all the expanded instructions. +static Instruction *ExpandConstantExpr(ConstantExpr *Cur, + PollyIRBuilder &Builder, + std::set &Expands) { + assert(Cur && "invalid constant expression passed"); + std::vector> Replacements; + for (unsigned i = 0; i < Cur->getNumOperands(); i++) { + Constant *COp = dyn_cast(Cur->getOperand(i)); + assert(COp && "constant must have a constant operand"); + if (isa(COp)) { + Instruction *Replacement = + ExpandConstantExpr(dyn_cast(COp), Builder, Expands); + Replacements.push_back(std::make_pair(i, Replacement)); + }; + } + + Instruction *I = Cur->getAsInstruction(); + for (std::pair &Replacement : Replacements) { + I->setOperand(Replacement.first, Replacement.second); + } + Expands.insert(I); + Builder.Insert(I); + return I; +} + +// rewrite a GEP to strip of the first index +// We need to do this because earlier it used to be @[i32 x ] +// It is now i32*. We don't need an extra "@" dereference +static bool rewriteGEP(User *MaybeGEP, Value *ArrToRewrite, Value *New, + PollyIRBuilder &IRBuilder) { + GetElementPtrInst *GEP = dyn_cast(MaybeGEP); + if (!GEP) + return false; + if (!(GEP->getPointerOperand() == ArrToRewrite)) + return false; + + auto Indices = GEP->indices(); + std::vector NewIndices(Indices.begin() + 1, Indices.end()); + + Value *NewGEP = IRBuilder.CreateGEP(New, NewIndices, "newgep"); + GEP->replaceAllUsesWith(NewGEP); + GEP->eraseFromParent(); + return true; +} + +static void editAllUses(Instruction *Inst, Value *Old, Value *New, + PollyIRBuilder &Builder) { + + std::set Visited; + std::set Next; + std::set Current = {Inst}; + + while (!Current.empty()) { + + for (User *CurUser : Current) { + // Try to rewrite the current as a GEP + if (rewriteGEP(CurUser, Old, New, Builder)) + continue; + + for (unsigned i = 0; i < CurUser->getNumOperands(); i++) { + User *OperandAsUser = dyn_cast(CurUser->getOperand(i)); + // if (Visited.count(OperandAsUser)) continue; + // Visited.insert(OperandAsUser); + + if (!OperandAsUser) { + errs() << "\t\t" << *OperandAsUser << " obtained from: " << *CurUser + << "is not a user!." + " Trying to replace: " + << *Old << " with: " << *New << "failed.\n"; + report_fatal_error("editAllUses failed with value that was not user"); + } + + // if (isa) continue; + // assert (!isa(OperandAsUser) && "Value should not be a + // DerivedUser"); + + // Only choice in User Instruction,DerivedUser, Constant + // NOTE: does this even make sense? + if ((isa( + OperandAsUser))) { // || isa(ValueUser))) { + if (OperandAsUser == Old) { + CurUser->setOperand(i, New); + } + } else { + assert(isa(OperandAsUser)); + + if (isa(OperandAsUser) || + isa(OperandAsUser) || + isa(OperandAsUser)) + continue; + + if (isa(OperandAsUser)) { + if (OperandAsUser == Old) { + CurUser->setOperand(i, New); + } + continue; + } + + // Only things that can contain a reference is a ConstantExpr + ConstantExpr *ValueConstExpr = dyn_cast(OperandAsUser); + assert(ValueConstExpr && "this must be a ValueConstExpr"); + + Instruction *I = ExpandConstantExpr(ValueConstExpr, Builder, Next); + CurUser->setOperand(i, I); + + } // end else + } // end operands for + } // end for current + + // TODO: Visited += Current + Current.clear(); + // Current = Next - Visited + // std::set_difference(Next.begin(), Next.end(), Visited.begin(), + // Visited.end(), + // std::inserter(Current, Current.end())); + Current = Next; + Next.clear(); + Visited.clear(); + } // end worklist +} + +// Given a value `Current`, return all Instructions that may contain `Current` +// in an expression. +static void getContainingInstructions(Value *Current, + std::vector &Owners) { + Instruction *I; + Constant *C; + if ((I = dyn_cast(Current))) { + Owners.push_back(I); + } else if ((C = dyn_cast(Current))) { + for (Use &CUse : C->uses()) { + // if (CUse == C) continue; + getContainingInstructions(CUse.getUser(), Owners); + } + } else { + errs() << "(" << *Current + << ") is neither an instruction nor a constant!.\n" + "The process of finding the owning instruction reached a node " + "with unknown replacement strategy"; + report_fatal_error("unable to find owning instruction"); + llvm_unreachable("should never reach here from getContainingInstruction"); + } +} + +static void RewriteGlobalArray(Module &M, const DataLayout &DL, + GlobalVariable &Array, + std::vector &ReplacedGlobals) { + static const unsigned AddrSpace = 0; + // We only want arrays. + ArrayType *ArrayTy = dyn_cast(Array.getType()->getElementType()); + if (!ArrayTy) + return; + Type *ElemTy = ArrayTy->getElementType(); + PointerType *ElemPtrTy = PointerType::get(ElemTy, AddrSpace); + + // We only wish to replace stuff with internal linkage. Otherwise, + // our type edit from [T] to T* would be illegal across modules. + // It is interesting that most arrays don't seem to be tagged with internal + // linkage? + if (GlobalValue::isWeakForLinker(Array.getLinkage()) && false) { + return; + } + + if (!Array.hasInitializer() || + !isa(Array.getInitializer())) + return; + + // At this point, we have committed to replacing this array. + ReplacedGlobals.push_back(&Array); + + std::string NewName = (Array.getName() + Twine(".toptr")).str(); + GlobalVariable *ReplacementToArr = + dyn_cast(M.getOrInsertGlobal(NewName, ElemPtrTy)); + ReplacementToArr->setInitializer(ConstantPointerNull::get(ElemPtrTy)); + + Function *PollyMallocManaged = GetOrCreatePollyMallocManaged(M); + Twine FnName = Array.getName() + ".constructor"; + PollyIRBuilder Builder(M.getContext()); + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false); + const GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + Function *F = Function::Create(Ty, Linkage, FnName, &M); + BasicBlock *Start = BasicBlock::Create(M.getContext(), "entry", F); + Builder.SetInsertPoint(Start); + + int ArraySizeInt = DL.getTypeAllocSizeInBits(ArrayTy) * 100; + Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), ArraySizeInt); + ArraySize->setName("array.size"); + + Value *AllocatedMemRaw = + Builder.CreateCall(PollyMallocManaged, {ArraySize}, "mem.raw"); + Value *AllocatedMemTyped = + Builder.CreatePointerCast(AllocatedMemRaw, ElemPtrTy, "mem.typed"); + Builder.CreateStore(AllocatedMemTyped, ReplacementToArr); + Builder.CreateRetVoid(); + + // HACK: refactor the priority stuff. + static int priority = 0; + appendToGlobalCtors(M, F, priority++, ReplacementToArr); + + errs() << "Done appending to global ctors\n"; + std::vector ArrayUserInstructions; + // Get all instructions that use array. We need to do this weird thing + // because `Constant`s that contain + for (Use &ArrayUse : Array.uses()) { + getContainingInstructions(ArrayUse.getUser(), ArrayUserInstructions); + } + + for (Instruction *UserOfArrayInst : ArrayUserInstructions) { + Builder.SetInsertPoint(UserOfArrayInst); + // Value *ArrPtrBitcast = Builder.CreateBitCast(ReplacementToArr, + // PointerType::get(ArrayTy, AddrSpace), "arrptr.bitcast"); + Value *ArrPtrLoaded = Builder.CreateLoad(ReplacementToArr, "arrptr.load"); + + std::set SeenSet; + editAllUses(UserOfArrayInst, &Array, ArrPtrLoaded, Builder); + } +} + class ManagedMemoryRewritePass : public ModulePass { public: static char ID; GPUArch Architecture; GPURuntime Runtime; + const DataLayout *DL; + ManagedMemoryRewritePass() : ModulePass(ID) {} + virtual bool runOnModule(Module &M) { + DL = &M.getDataLayout(); + Function *Malloc = M.getFunction("malloc"); if (Malloc) { @@ -114,6 +340,16 @@ Free->eraseFromParent(); } + std::vector GlobalsToErase; + for (GlobalVariable &Global : M.globals()) { + RewriteGlobalArray(M, *DL, Global, GlobalsToErase); + } + + // Erase all globals from the parent + for (GlobalVariable *GV : GlobalsToErase) { + GV->eraseFromParent(); + } + return true; } }; Index: test/GPGPU/simple-managed-memory-rewrite.ll =================================================================== --- /dev/null +++ test/GPGPU/simple-managed-memory-rewrite.ll @@ -0,0 +1,70 @@ +; RUN: opt %loadPolly -analyze -polly-process-unprofitable \ +; RUN: -polly-scops -polly-use-llvm-names < %s | FileCheck %s --check-prefix=SCOP + +; RUN: opt %loadPolly -S -polly-process-unprofitable -polly-acc-mincompute=0 \ +; RUN: -polly-target=gpu -polly-codegen-ppcg -polly-acc-codegen-managed-memory \ +; RUN: -polly-acc-rewrite-managed-memory < %s | FileCheck %s --check-prefix=HOST-IR + +; SCOP: Function: f +; SCOP-NEXT: Region: %for.body---%for.end +; SCOP-NEXT: Max Loop Depth: 1 +; SCOP: i32 MemRef_A[*]; + +; Check that we generate a constructor call for @A.toptr +; HOST-IR: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @A.constructor, i8* bitcast (i32** @A.toptr to i8*) }] + +; Check that we generate a constructor +; HOST-IR: define void @A.constructor() { +; HOST-IR-NEXT: entry: +; HOST-IR-NEXT: %mem.raw = call i8* @polly_mallocManaged(i64 320000) +; HOST-IR-NEXT: %mem.typed = bitcast i8* %mem.raw to i32* +; HOST-IR-NEXT: store i32* %mem.typed, i32** @A.toptr +; HOST-IR-NEXT: ret void +; HOST-IR-NEXT: } + +; HOST-IR-NOT: @A + +source_filename = "test.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +@A = common global [100 x i32] zeroinitializer, align 16 + +define void @f() { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.body + %indvars.iv1 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @A, i64 0, i64 %indvars.iv1 + store i32 42, i32* %arrayidx, align 4, !tbaa !3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 100 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0 + + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0 + +attributes #0 = { argmemonly nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 6.0.0 (http://llvm.org/git/clang.git 6660f0d30ef23b3142a6b08f9f41aad3d47c084f) (http://llvm.org/git/llvm.git 052dd78cb30f77a05dc8bb06b851402c4b6c6587)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} Index: tools/GPURuntime/GPUJIT.c =================================================================== --- tools/GPURuntime/GPUJIT.c +++ tools/GPURuntime/GPUJIT.c @@ -33,8 +33,8 @@ #include #include -static int DebugMode; -static int CacheMode; +static int DebugMode = 1; +static int CacheMode = 1; static PollyGPURuntime Runtime = RUNTIME_NONE; @@ -1455,6 +1455,7 @@ } void *polly_mallocManaged(size_t size) { + polly_initContextCUDA(); dump_function(); void *a; if (cudaMallocManaged(&a, size, cudaMemAttachGlobal) != cudaSuccess) { @@ -1462,6 +1463,8 @@ exit(-1); } addManagedPtr(a); + + fprintf(stderr, "\n *** cudaMallocManaged return value: %p\n", a); return a; }