diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -5090,7 +5090,10 @@ const Instruction &I) const = 0; virtual ExecutionDomainTy getExecutionDomain(const BasicBlock &) const = 0; - virtual ExecutionDomainTy getExecutionDomain(const CallBase &) const = 0; + /// Return the execution domain with which the call \p CB is entered and the + /// one with which it is left. + virtual std::pair + getExecutionDomain(const CallBase &CB) const = 0; virtual ExecutionDomainTy getFunctionExecutionDomain() const = 0; /// This function should return true if the type of the \p AA is diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -332,6 +332,9 @@ HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value()); } + if (!HWLoopInfo.CountType) + HWLoopInfo.CountType = IntegerType::get(Ctx, 32); + if (Opts.Decrement.has_value()) HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value()); @@ -389,8 +392,11 @@ Value *EltsRem = InsertPHICounter(Setup, LoopDec); LoopDec->setOperand(0, EltsRem); UpdateBranch(LoopDec); - } else + } else { + if (!LoopDecrement) + LoopDecrement = ConstantInt::get(CountType, 1); InsertLoopDec(); + } // Run through the basic blocks of the loop and see if any of them have dead // PHIs that can be removed. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1590,7 +1590,7 @@ // If we already have an SDValue for this value, use it. SDValue &N = NodeMap[V]; if (N.getNode()) { - if (isa(N) || isa(N)) { + if (isIntOrFPConstant(N)) { // Remove the debug location from the node as the node is about to be used // in a location which may differ from the original debug location. This // is relevant to Constant and ConstantFP nodes because they can appear diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -258,8 +258,7 @@ if (Incoming.getValueType().getSizeInBits() > 64) return false; - return (isa(Incoming) || isa(Incoming) || - Incoming.isUndef()); + return isIntOrFPConstant(Incoming) || Incoming.isUndef(); } /// Try to find existing copies of the incoming values in stack slots used for diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11249,7 +11249,7 @@ continue; } Values.insert(Elt); - if (!isa(Elt) && !isa(Elt)) { + if (!isIntOrFPConstant(Elt)) { IsAllConstants = false; NumConstants--; } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1700,6 +1700,8 @@ return false; } else { auto PredIt = pred_begin(IntrBB); + if (PredIt == pred_end(IntrBB)) + return false; if ((*PredIt) != BB) return false; if (++PredIt != pred_end(IntrBB)) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -1706,37 +1707,27 @@ }; if (!ReplVal) { - for (Use *U : *UV) + auto *DT = + OMPInfoCache.getAnalysisResultForFunction(F); + if (!DT) + return false; + Instruction *IP = nullptr; + for (Use *U : *UV) { if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { + if (IP) + IP = DT->findNearestCommonDominator(IP, CI); + else + IP = CI; if (!CanBeMoved(*CI)) continue; - - // If the function is a kernel, dedup will move - // the runtime call right after the kernel init callsite. Otherwise, - // it will move it to the beginning of the caller function. - if (isKernel(F)) { - auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; - auto *KernelInitUV = KernelInitRFI.getUseVector(F); - - if (KernelInitUV->empty()) - continue; - - assert(KernelInitUV->size() == 1 && - "Expected a single __kmpc_target_init in kernel\n"); - - CallInst *KernelInitCI = - getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI); - assert(KernelInitCI && - "Expected a call to __kmpc_target_init in kernel\n"); - - CI->moveAfter(KernelInitCI); - } else - CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); - ReplVal = CI; - break; + if (!ReplVal) + ReplVal = CI; } + } if (!ReplVal) return false; + assert(IP && "Expected insertion point!"); + cast(ReplVal)->moveBefore(IP); } // If we use a call as a replacement value we need to make sure the ident is @@ -2566,13 +2557,19 @@ } const std::string getAsStr() const override { - unsigned TotalBlocks = 0, InitialThreadBlocks = 0; + unsigned TotalBlocks = 0, InitialThreadBlocks = 0, AlignedBlocks = 0; for (auto &It : BEDMap) { + if (!It.getFirst()) + continue; TotalBlocks++; InitialThreadBlocks += It.getSecond().IsExecutedByInitialThreadOnly; + AlignedBlocks += It.getSecond().IsReachedFromAlignedBarrierOnly && + It.getSecond().IsReachingAlignedBarrierOnly; } return "[AAExecutionDomain] " + std::to_string(InitialThreadBlocks) + "/" + - std::to_string(TotalBlocks) + " executed by initial thread only"; + std::to_string(AlignedBlocks) + " of " + + std::to_string(TotalBlocks) + + " executed by initial thread / aligned"; } /// See AbstractAttribute::trackStatistics(). @@ -2595,7 +2592,7 @@ SmallPtrSet DeletedBarriers; auto HandleAlignedBarrier = [&](CallBase *CB) { - const ExecutionDomainTy &ED = CEDMap[CB]; + const ExecutionDomainTy &ED = CEDMap[{CB, PRE}]; if (!ED.IsReachedFromAlignedBarrierOnly || ED.EncounteredNonLocalSideEffect) return; @@ -2628,7 +2625,7 @@ // The final aligned barrier (LastCB) reaching the kernel end was // removed already. This means we can go one step further and remove // the barriers encoutered last before (LastCB). - const ExecutionDomainTy &LastED = CEDMap[LastCB]; + const ExecutionDomainTy &LastED = CEDMap[{LastCB, PRE}]; Worklist.append(LastED.AlignedBarriers.begin(), LastED.AlignedBarriers.end()); } @@ -2661,12 +2658,12 @@ /// Merge all information from \p PredED into the successor \p ED. If /// \p InitialEdgeOnly is set, only the initial edge will enter the block /// represented by \p ED from this predecessor. - void mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED, + bool mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED, bool InitialEdgeOnly = false); /// Accumulate information for the entry block in \p EntryBBED. - void handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED); + bool handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED); /// See AbstractAttribute::updateImpl. ChangeStatus updateImpl(Attributor &A) override; @@ -2676,6 +2673,7 @@ bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { if (!isValidState()) return false; + assert(BB.getParent() == getAnchorScope() && "Block is out of scope!"); return BEDMap.lookup(&BB).IsExecutedByInitialThreadOnly; } @@ -2697,7 +2695,7 @@ if (CB != &I && AlignedBarriers.contains(const_cast(CB))) { break; } - const auto &It = CEDMap.find(CB); + const auto &It = CEDMap.find({CB, PRE}); if (It == CEDMap.end()) continue; if (!It->getSecond().IsReachingAlignedBarrierOnly) @@ -2717,26 +2715,12 @@ if (CB != &I && AlignedBarriers.contains(const_cast(CB))) { break; } - const auto &It = CEDMap.find(CB); + const auto &It = CEDMap.find({CB, POST}); if (It == CEDMap.end()) continue; - if (!AA::isNoSyncInst(A, *CB, *this)) { - if (It->getSecond().IsReachedFromAlignedBarrierOnly) { - break; - } - return false; - } - - Function *Callee = CB->getCalledFunction(); - if (!Callee || Callee->isDeclaration()) - return false; - const auto &EDAA = A.getAAFor( - *this, IRPosition::function(*Callee), DepClassTy::OPTIONAL); - if (!EDAA.getState().isValidState()) - return false; - if (!EDAA.getFunctionExecutionDomain().IsReachedFromAlignedBarrierOnly) - return false; - break; + if (It->getSecond().IsReachedFromAlignedBarrierOnly) + break; + return false; } while ((CurI = CurI->getPrevNonDebugInstruction())); if (!CurI) { @@ -2759,10 +2743,11 @@ "No request should be made against an invalid state!"); return BEDMap.lookup(&BB); } - ExecutionDomainTy getExecutionDomain(const CallBase &CB) const override { + std::pair + getExecutionDomain(const CallBase &CB) const override { assert(isValidState() && "No request should be made against an invalid state!"); - return CEDMap.lookup(&CB); + return {CEDMap.lookup({&CB, PRE}), CEDMap.lookup({&CB, POST})}; } ExecutionDomainTy getFunctionExecutionDomain() const override { assert(isValidState() && @@ -2819,12 +2804,21 @@ /// Mapping containing information about the function for other AAs. ExecutionDomainTy InterProceduralED; + enum Direction { PRE = 0, POST = 1 }; /// Mapping containing information per block. DenseMap BEDMap; - DenseMap CEDMap; + DenseMap, ExecutionDomainTy> + CEDMap; SmallSetVector AlignedBarriers; ReversePostOrderTraversal *RPOT = nullptr; + + /// Set \p R to \V and report true if that changed \p R. + static bool setAndRecord(bool &R, bool V) { + bool Eq = (R == V); + R = V; + return !Eq; + } }; void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions( @@ -2836,26 +2830,33 @@ ED.addAlignedBarrier(A, *AB); } -void AAExecutionDomainFunction::mergeInPredecessor( +bool AAExecutionDomainFunction::mergeInPredecessor( Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED, bool InitialEdgeOnly) { - ED.IsExecutedByInitialThreadOnly = - InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly && - ED.IsExecutedByInitialThreadOnly); - - ED.IsReachedFromAlignedBarrierOnly = ED.IsReachedFromAlignedBarrierOnly && - PredED.IsReachedFromAlignedBarrierOnly; - ED.EncounteredNonLocalSideEffect = - ED.EncounteredNonLocalSideEffect | PredED.EncounteredNonLocalSideEffect; + + bool Changed = false; + Changed |= + setAndRecord(ED.IsExecutedByInitialThreadOnly, + InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly && + ED.IsExecutedByInitialThreadOnly)); + + Changed |= setAndRecord(ED.IsReachedFromAlignedBarrierOnly, + ED.IsReachedFromAlignedBarrierOnly && + PredED.IsReachedFromAlignedBarrierOnly); + Changed |= setAndRecord(ED.EncounteredNonLocalSideEffect, + ED.EncounteredNonLocalSideEffect | + PredED.EncounteredNonLocalSideEffect); + // Do not track assumptions and barriers as part of Changed. if (ED.IsReachedFromAlignedBarrierOnly) mergeInPredecessorBarriersAndAssumptions(A, ED, PredED); else ED.clearAssumeInstAndAlignedBarriers(); + return Changed; } -void AAExecutionDomainFunction::handleCallees(Attributor &A, +bool AAExecutionDomainFunction::handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED) { - SmallVector CallSiteEDs; + SmallVector, 4> CallSiteEDs; auto PredForCallSite = [&](AbstractCallSite ACS) { const auto &EDAA = A.getAAFor( *this, IRPosition::function(*ACS.getInstruction()->getFunction()), @@ -2872,9 +2873,10 @@ if (A.checkForAllCallSites(PredForCallSite, *this, /* RequiresAllCallSites */ true, AllCallSitesKnown)) { - for (const auto &CSED : CallSiteEDs) { - mergeInPredecessor(A, EntryBBED, CSED); - ExitED.IsReachingAlignedBarrierOnly &= CSED.IsReachingAlignedBarrierOnly; + for (const auto &[CSInED, CSOutED] : CallSiteEDs) { + mergeInPredecessor(A, EntryBBED, CSInED); + ExitED.IsReachingAlignedBarrierOnly &= + CSOutED.IsReachingAlignedBarrierOnly; } } else { @@ -2894,10 +2896,17 @@ } } + bool Changed = false; auto &FnED = BEDMap[nullptr]; - FnED.IsReachedFromAlignedBarrierOnly &= - EntryBBED.IsReachedFromAlignedBarrierOnly; - FnED.IsReachingAlignedBarrierOnly &= ExitED.IsReachingAlignedBarrierOnly; + Changed |= setAndRecord(FnED.IsReachedFromAlignedBarrierOnly, + FnED.IsReachedFromAlignedBarrierOnly & + EntryBBED.IsReachedFromAlignedBarrierOnly); + Changed |= setAndRecord(FnED.IsReachingAlignedBarrierOnly, + FnED.IsReachingAlignedBarrierOnly & + ExitED.IsReachingAlignedBarrierOnly); + Changed |= setAndRecord(FnED.IsExecutedByInitialThreadOnly, + EntryBBED.IsExecutedByInitialThreadOnly); + return Changed; } ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { @@ -2911,8 +2920,9 @@ if (CB) Changed |= AlignedBarriers.insert(CB); // First, update the barrier ED kept in the separate CEDMap. - auto &CallED = CEDMap[CB]; - mergeInPredecessor(A, CallED, ED); + auto &CallInED = CEDMap[{CB, PRE}]; + Changed |= mergeInPredecessor(A, CallInED, ED); + CallInED.IsReachingAlignedBarrierOnly = true; // Next adjust the ED we use for the traversal. ED.EncounteredNonLocalSideEffect = false; ED.IsReachedFromAlignedBarrierOnly = true; @@ -2920,18 +2930,13 @@ ED.clearAssumeInstAndAlignedBarriers(); if (CB) ED.addAlignedBarrier(A, *CB); + auto &CallOutED = CEDMap[{CB, POST}]; + Changed |= mergeInPredecessor(A, CallOutED, ED); }; auto &LivenessAA = A.getAAFor(*this, getIRPosition(), DepClassTy::OPTIONAL); - // Set \p R to \V and report true if that changed \p R. - auto SetAndRecord = [&](bool &R, bool V) { - bool Eq = (R == V); - R = V; - return !Eq; - }; - auto &OMPInfoCache = static_cast(A.getInfoCache()); Function *F = getAnchorScope(); @@ -2949,7 +2954,7 @@ ExecutionDomainTy ED; // Propagate "incoming edges" into information about this block. if (IsEntryBB) { - handleCallees(A, ED); + Changed |= handleCallees(A, ED); } else { // For live non-entry blocks we only propagate // information via live edges. @@ -3018,8 +3023,8 @@ // Record how we entered the call, then accumulate the effect of the // call in ED for potential use by the callee. - auto &CallED = CEDMap[CB]; - mergeInPredecessor(A, CallED, ED); + auto &CallInED = CEDMap[{CB, PRE}]; + Changed |= mergeInPredecessor(A, CallInED, ED); // If we have a sync-definition we can check if it starts/ends in an // aligned barrier. If we are unsure we assume any sync breaks @@ -3031,7 +3036,6 @@ if (EDAA.getState().isValidState()) { const auto &CalleeED = EDAA.getFunctionExecutionDomain(); ED.IsReachedFromAlignedBarrierOnly = - CallED.IsReachedFromAlignedBarrierOnly = CalleeED.IsReachedFromAlignedBarrierOnly; AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly; if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly) @@ -3040,20 +3044,27 @@ else ED.EncounteredNonLocalSideEffect = CalleeED.EncounteredNonLocalSideEffect; - if (!CalleeED.IsReachingAlignedBarrierOnly) + if (!CalleeED.IsReachingAlignedBarrierOnly) { + Changed |= + setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false); SyncInstWorklist.push_back(&I); + } if (CalleeED.IsReachedFromAlignedBarrierOnly) mergeInPredecessorBarriersAndAssumptions(A, ED, CalleeED); + auto &CallOutED = CEDMap[{CB, POST}]; + Changed |= mergeInPredecessor(A, CallOutED, ED); continue; } } - if (!IsNoSync) - ED.IsReachedFromAlignedBarrierOnly = - CallED.IsReachedFromAlignedBarrierOnly = false; + if (!IsNoSync) { + ED.IsReachedFromAlignedBarrierOnly = false; + Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false); + SyncInstWorklist.push_back(&I); + } AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly; ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory(); - if (!IsNoSync) - SyncInstWorklist.push_back(&I); + auto &CallOutED = CEDMap[{CB, POST}]; + Changed |= mergeInPredecessor(A, CallOutED, ED); } if (!I.mayHaveSideEffects() && !I.mayReadFromMemory()) @@ -3092,12 +3103,14 @@ if (!isa(BB.getTerminator()) && !BB.getTerminator()->getNumSuccessors()) { - mergeInPredecessor(A, InterProceduralED, ED); + Changed |= mergeInPredecessor(A, InterProceduralED, ED); auto &FnED = BEDMap[nullptr]; if (!FnED.IsReachingAlignedBarrierOnly) { IsEndAndNotReachingAlignedBarriersOnly = true; SyncInstWorklist.push_back(BB.getTerminator()); + auto &BBED = BEDMap[&BB]; + Changed |= setAndRecord(BBED.IsReachingAlignedBarrierOnly, false); } if (IsKernel) HandleAlignedBarrier(nullptr, ED); @@ -3129,19 +3142,21 @@ while (!SyncInstWorklist.empty()) { Instruction *SyncInst = SyncInstWorklist.pop_back_val(); Instruction *CurInst = SyncInst; - bool HitAlignedBarrier = false; + bool HitAlignedBarrierOrKnownEnd = false; while ((CurInst = CurInst->getPrevNode())) { auto *CB = dyn_cast(CurInst); if (!CB) continue; - auto &CallED = CEDMap[CB]; - if (SetAndRecord(CallED.IsReachingAlignedBarrierOnly, false)) + auto &CallOutED = CEDMap[{CB, POST}]; + if (setAndRecord(CallOutED.IsReachingAlignedBarrierOnly, false)) Changed = true; - HitAlignedBarrier = AlignedBarriers.count(CB); - if (HitAlignedBarrier) + auto &CallInED = CEDMap[{CB, PRE}]; + HitAlignedBarrierOrKnownEnd = + AlignedBarriers.count(CB) || !CallInED.IsReachingAlignedBarrierOnly; + if (HitAlignedBarrierOrKnownEnd) break; } - if (HitAlignedBarrier) + if (HitAlignedBarrierOrKnownEnd) continue; BasicBlock *SyncBB = SyncInst->getParent(); for (auto *PredBB : predecessors(SyncBB)) { @@ -3149,14 +3164,15 @@ continue; if (!Visited.insert(PredBB)) continue; - SyncInstWorklist.push_back(PredBB->getTerminator()); auto &PredED = BEDMap[PredBB]; - if (SetAndRecord(PredED.IsReachingAlignedBarrierOnly, false)) + if (setAndRecord(PredED.IsReachingAlignedBarrierOnly, false)) { Changed = true; + SyncInstWorklist.push_back(PredBB->getTerminator()); + } } if (SyncBB != &EntryBB) continue; - if (SetAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false)) + if (setAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false)) Changed = true; } @@ -3510,22 +3526,7 @@ Attributor::SimplifictionCallbackTy StateMachineSimplifyCB = [&](const IRPosition &IRP, const AbstractAttribute *AA, bool &UsedAssumedInformation) -> std::optional { - // IRP represents the "use generic state machine" argument of an - // __kmpc_target_init call. We will answer this one with the internal - // state. As long as we are not in an invalid state, we will create a - // custom state machine so the value should be a `i1 false`. If we are - // in an invalid state, we won't change the value that is in the IR. - if (!ReachedKnownParallelRegions.isValidState()) - return nullptr; - // If we have disabled state machine rewrites, don't make a custom one. - if (DisableOpenMPOptStateMachineRewrite) return nullptr; - if (AA) - A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); - UsedAssumedInformation = !isAtFixpoint(); - auto *FalseVal = - ConstantInt::getBool(IRP.getAnchorValue().getContext(), false); - return FalseVal; }; Attributor::SimplifictionCallbackTy ModeSimplifyCB = diff --git a/llvm/test/Transforms/Attributor/reduced/pred_iterator_crash.ll b/llvm/test/Transforms/Attributor/reduced/pred_iterator_crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Attributor/reduced/pred_iterator_crash.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes --check-globals +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC + +%"struct.ompx::state::TeamStateTy" = type { %"struct.ompx::state::ICVStateTy", i32, i32, ptr } +%"struct.ompx::state::ICVStateTy" = type { i32, i32, i32, i32, i32, i32 } + +@_ZN4ompx5state9TeamStateE = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) #0 + +;. +; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global %"struct.ompx::state::TeamStateTy" undef +;. +define weak_odr amdgpu_kernel void @__omp_offloading_16_19bc70bc_main_l44() { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_19bc70bc_main_l44() { +; CHECK-NEXT: ret void +; + call fastcc void @__omp_outlined__() + ret void +} + +define internal fastcc void @__omp_outlined__() { +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CGSCC-LABEL: define {{[^@]+}}@__omp_outlined__ +; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { +; CGSCC-NEXT: br label [[TMP2:%.*]] +; CGSCC: 1: +; CGSCC-NEXT: unreachable +; CGSCC: 2: +; CGSCC-NEXT: ret void +; + %1 = load i32, ptr getelementptr inbounds (i8, ptr addrspacecast (ptr addrspace(3) @_ZN4ompx5state9TeamStateE to ptr), i64 4), align 4 + br label %4 + +2: ; No predecessors! + %3 = icmp eq i32 %1, 0 + call void @llvm.assume(i1 %3) + br label %4 + +4: ; preds = %2, %0 + ret void +} +;. +; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +;. +; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +;. +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; TUNIT: {{.*}} diff --git a/llvm/test/Transforms/HardwareLoops/unexpected-inputs.ll b/llvm/test/Transforms/HardwareLoops/unexpected-inputs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/HardwareLoops/unexpected-inputs.ll @@ -0,0 +1,37 @@ +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK + +define void @while_lt(i32 %i, i32 %N, ptr nocapture %A) { +; CHECK-LABEL: @while_lt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i32 [[I:%.*]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP4]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[N]], [[I]] +; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[TMP0]]) +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[I_ADDR_05:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ [[I]], [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_ADDR_05]] +; CHECK-NEXT: store i32 [[I_ADDR_05]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_ADDR_05]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) +; CHECK-NEXT: br i1 [[TMP1]], label [[WHILE_BODY]], label [[WHILE_END]] +; CHECK: while.end: +; CHECK-NEXT: ret void +entry: + %cmp4 = icmp ult i32 %i, %N + br i1 %cmp4, label %while.body, label %while.end + +while.body: + %i.addr.05 = phi i32 [ %inc, %while.body ], [ %i, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.addr.05 + store i32 %i.addr.05, ptr %arrayidx, align 4 + %inc = add nuw i32 %i.addr.05, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %while.end, label %while.body + +while.end: + ret void +} diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll --- a/llvm/test/Transforms/OpenMP/deduplication.ll +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -102,8 +102,8 @@ define void @local_and_global_gtid_calls() { ; CHECK-LABEL: define {{[^@]+}}@local_and_global_gtid_calls() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8 +; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) @@ -132,10 +132,10 @@ define void @local_gtid_calls_only() { ; CHECK-LABEL: define {{[^@]+}}@local_gtid_calls_only() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR1:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8 ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR2:%.*]] = alloca [[STRUCT_IDENT_T]], align 8 ; CHECK-NEXT: [[DOTKMPC_LOC_ADDR3:%.*]] = alloca [[STRUCT_IDENT_T]], align 8 +; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) ; CHECK-NEXT: call void @useI32(i32 [[TID5]]) diff --git a/llvm/test/Transforms/OpenMP/deduplication_target.ll b/llvm/test/Transforms/OpenMP/deduplication_target.ll --- a/llvm/test/Transforms/OpenMP/deduplication_target.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_target.ll @@ -19,10 +19,10 @@ ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) ; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll --- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll +++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll @@ -37,7 +37,7 @@ ; CHECK: if.else: ; CHECK-NEXT: call void @barrier() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @use1(i32 undef) #[[ATTR6]] -; CHECK-NEXT: call void @llvm.assume(i1 true) +; CHECK-NEXT: call void @llvm.assume(i1 undef) ; CHECK-NEXT: call void @barrier() #[[ATTR6]] ; CHECK-NEXT: br label [[IF_MERGE]] ; CHECK: if.merge: diff --git a/llvm/tools/dsymutil/MachODebugMapParser.cpp b/llvm/tools/dsymutil/MachODebugMapParser.cpp --- a/llvm/tools/dsymutil/MachODebugMapParser.cpp +++ b/llvm/tools/dsymutil/MachODebugMapParser.cpp @@ -113,6 +113,8 @@ StringRef BinaryPath); void Warning(const Twine &Msg, StringRef File = StringRef()) { + assert(Result && + "The debug map must be initialized before calling this function"); WithColor::warning() << "(" << MachOUtils::getArchName( Result->getTriple().getArchName()) @@ -200,10 +202,9 @@ std::unique_ptr MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary, StringRef BinaryPath) { + Result = std::make_unique(MainBinary.getArchTriple(), BinaryPath, + MainBinary.getUuid()); loadMainBinarySymbols(MainBinary); - ArrayRef UUID = MainBinary.getUuid(); - Result = - std::make_unique(MainBinary.getArchTriple(), BinaryPath, UUID); MainBinaryStrings = MainBinary.getStringTableData(); for (const SymbolRef &Symbol : MainBinary.symbols()) { const DataRefImpl &DRI = Symbol.getRawDataRefImpl(); diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -251,7 +251,7 @@ /// OFFLOAD_SUCCESS. The entry is locked for this operation. template int foreachShadowPointerInfo(CBTy CB) const { for (auto &It : States->ShadowPtrInfos) - if (CB(It) == OFFLOAD_FAIL) + if (CB(const_cast(It)) == OFFLOAD_FAIL) return OFFLOAD_FAIL; return OFFLOAD_SUCCESS; } diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -751,16 +751,13 @@ /// The mapping type (bitfield). int64_t ArgType; - /// Index of the argument in the data mapping scheme. - int32_t ArgIndex; - /// The target pointer information. TargetPointerResultTy TPR; PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType, - int32_t ArgIndex, TargetPointerResultTy &&TPR) + TargetPointerResultTy &&TPR) : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType), - ArgIndex(ArgIndex), TPR(std::move(TPR)) {} + TPR(std::move(TPR)) {} }; } // namespace @@ -772,12 +769,10 @@ /// according to the successfulness of the operations. [[nodiscard]] static int postProcessingTargetDataEnd(DeviceTy *Device, - SmallVector &EntriesInfo, - bool FromMapper) { + SmallVector &EntriesInfo) { int Ret = OFFLOAD_SUCCESS; - void *FromMapperBase = nullptr; - for (auto &[HstPtrBegin, DataSize, ArgType, ArgIndex, TPR] : EntriesInfo) { + for (auto &[HstPtrBegin, DataSize, ArgType, TPR] : EntriesInfo) { bool DelEntry = !TPR.isHostPointer(); // If the last element from the mapper (for end transfer args comes in @@ -788,11 +783,6 @@ DelEntry = false; // protect parent struct from being deallocated } - if (DelEntry && FromMapper && ArgIndex == 0) { - DelEntry = false; - FromMapperBase = HstPtrBegin; - } - // If we marked the entry to be deleted we need to verify no other // thread reused it by now. If deletion is still supposed to happen by // this thread LR will be set and exclusive access to the HDTT map @@ -836,7 +826,7 @@ // TPR), or erase TPR. TPR.setEntry(nullptr); - if (!DelEntry || (FromMapperBase && FromMapperBase == HstPtrBegin)) + if (!DelEntry) continue; Ret = Device->eraseMapEntry(HDTTMap, Entry, DataSize); @@ -860,7 +850,6 @@ void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { int Ret = OFFLOAD_SUCCESS; auto *PostProcessingPtrs = new SmallVector(); - void *FromMapperBase = nullptr; // process each input. for (int32_t I = ArgNum - 1; I >= 0; --I) { // Ignore private variables and arrays - there is no mapping for them. @@ -998,7 +987,7 @@ } // Add pointer to the buffer for post-synchronize processing. - PostProcessingPtrs->emplace_back(HstPtrBegin, DataSize, ArgTypes[I], I, + PostProcessingPtrs->emplace_back(HstPtrBegin, DataSize, ArgTypes[I], std::move(TPR)); PostProcessingPtrs->back().TPR.getEntry()->unlock(); } @@ -1007,8 +996,7 @@ // TODO: We might want to remove `mutable` in the future by not changing the // captured variables somehow. AsyncInfo.addPostProcessingFunction([=, Device = &Device]() mutable -> int { - return postProcessingTargetDataEnd(Device, *PostProcessingPtrs, - FromMapperBase); + return postProcessingTargetDataEnd(Device, *PostProcessingPtrs); }); return Ret; @@ -1050,7 +1038,7 @@ } if (TPR.getEntry()) { int Ret = TPR.getEntry()->foreachShadowPointerInfo( - [&](const ShadowPtrInfoTy &ShadowPtr) { + [&](ShadowPtrInfoTy &ShadowPtr) { DP("Restoring original target pointer value " DPxMOD " for target " "pointer " DPxMOD "\n", DPxPTR(ShadowPtr.TgtPtrVal), DPxPTR(ShadowPtr.TgtPtrAddr)); diff --git a/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp b/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp --- a/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp +++ b/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp @@ -1,8 +1,5 @@ // RUN: %libomptarget-compilexx-run-and-check-generic -// Wrong results on amdgpu -// XFAIL: amdgcn-amd-amdhsa - #include #include @@ -45,17 +42,23 @@ spp[0][0].f.b[1], spp[0][0].f.b == &x[0] ? 1 : 0); // CHECK: 111 222 777 20.00000 1 + int spp00fa = -1, spp00fca = -1, spp00fb_r = -1; __intptr_t p = reinterpret_cast<__intptr_t>(&x[0]); -#pragma omp target map(tofrom : spp[0][0]) firstprivate(p) +#pragma omp target map(tofrom: spp[0][0]) firstprivate(p) \ + map(from: spp00fa, spp00fca, spp00fb_r) { - printf("%d %d %d\n", spp[0][0].f.a, spp[0][0].f.c.a, - spp[0][0].f.b == reinterpret_cast(p) ? 1 : 0); - // CHECK: 222 777 0 + spp00fa = spp[0][0].f.a; + spp00fca = spp[0][0].f.c.a; + spp00fb_r = spp[0][0].f.b == reinterpret_cast(p) ? 1 : 0; + printf("%d %d %d\n", spp00fa, spp00fca, spp00fb_r); + // XCHECK: 222 777 0 spp[0][0].e = 333; spp[0][0].f.a = 444; spp[0][0].f.c.a = 555; spp[0][0].f.b[1] = 40; } + printf("%d %d %d\n", spp00fa, spp00fca, spp00fb_r); + // CHECK: 222 777 0 printf("%d %d %d %4.5f %d\n", spp[0][0].e, spp[0][0].f.a, spp[0][0].f.c.a, spp[0][0].f.b[1], spp[0][0].f.b == &x[0] ? 1 : 0); // CHECK: 333 222 777 40.00000 1 diff --git a/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp b/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp --- a/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp +++ b/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp @@ -1,8 +1,5 @@ // RUN: %libomptarget-compilexx-run-and-check-generic -// Wrong results on amdgpu -// XFAIL: amdgcn-amd-amdhsa - #include #include @@ -42,19 +39,25 @@ spp[0][0].g == &y[0] ? 1 : 0); // CHECK: 111 222 20.00000 1 30 1 + int spp00fa = -1, spp00fb_r = -1, spp00fg1 = -1, spp00fg_r = -1; __intptr_t p = reinterpret_cast<__intptr_t>(&x[0]), p1 = reinterpret_cast<__intptr_t>(&y[0]); -#pragma omp target map(tofrom : spp[0][0]) firstprivate(p, p1) +#pragma omp target map(tofrom : spp[0][0]) firstprivate(p, p1) \ + map(from: spp00fa, spp00fb_r, spp00fg1, spp00fg_r) { - printf("%d %d %d %d\n", spp[0][0].f.a, - spp[0][0].f.b == reinterpret_cast(p) ? 1 : 0, spp[0][0].g[1], - spp[0][0].g == reinterpret_cast(p1) ? 1 : 0); - // CHECK: 222 0 30 0 + spp00fa = spp[0][0].f.a; + spp00fb_r = spp[0][0].f.b == reinterpret_cast(p) ? 1 : 0; + spp00fg1 = spp[0][0].g[1]; + spp00fg_r = spp[0][0].g == reinterpret_cast(p1) ? 1 : 0; + printf("%d %d %d %d\n", spp00fa, spp00fb_r, spp00fg1, spp00fg_r); + // XCHECK: 222 0 30 0 spp[0][0].e = 333; spp[0][0].f.a = 444; spp[0][0].f.b[1] = 40; spp[0][0].g[1] = 50; } + printf("%d %d %d %d\n", spp00fa, spp00fb_r, spp00fg1, spp00fg_r); + // CHECK: 222 0 30 0 printf("%d %d %4.5f %d %d %d\n", spp[0][0].e, spp[0][0].f.a, spp[0][0].f.b[1], spp[0][0].f.b == &x[0] ? 1 : 0, spp[0][0].g[1], spp[0][0].g == &y[0] ? 1 : 0); diff --git a/openmp/libomptarget/test/mapping/lambda_by_value.cpp b/openmp/libomptarget/test/mapping/lambda_by_value.cpp --- a/openmp/libomptarget/test/mapping/lambda_by_value.cpp +++ b/openmp/libomptarget/test/mapping/lambda_by_value.cpp @@ -1,8 +1,5 @@ // RUN: %libomptarget-compilexx-run-and-check-generic -// Wrong results on amdgpu -// XFAIL: amdgcn-amd-amdhsa - #include #include @@ -11,6 +8,13 @@ // CHECK: tgt : [[V2]] [[PX_TGT]] 1 // CHECK: out : [[V2]] [[V2]] [[PX]] [[PY]] +#pragma omp begin declare target +int a = -1, *c; +long b = -1; +const long *d; +int e = -1, *f, g = -1; +#pragma omp end declare target + int main() { int x[10]; long y[8]; @@ -18,18 +22,27 @@ y[1] = 222; auto lambda = [&x, y]() { + a = x[1]; + b = y[1]; + c = &x[0]; + d = &y[0]; printf("lambda: %d %ld %p %p\n", x[1], y[1], &x[0], &y[0]); x[1] = y[1]; }; - printf("before: %d %ld %p %p\n", x[1], y[1], &x[0], &y[0]); intptr_t xp = (intptr_t)&x[0]; #pragma omp target firstprivate(xp) { lambda(); + e = x[1]; + f = &x[0]; + g = (&x[0] != (int *)xp); printf("tgt : %d %p %d\n", x[1], &x[0], (&x[0] != (int *)xp)); } +#pragma omp target update from(a, b, c, d, e, f, g) + printf("lambda: %d %ld %p %p\n", a, b, c, d); + printf("tgt : %d %p %d\n", e, f, g); printf("out : %d %ld %p %p\n", x[1], y[1], &x[0], &y[0]); return 0; diff --git a/openmp/libomptarget/test/mapping/ompx_hold/struct.c b/openmp/libomptarget/test/mapping/ompx_hold/struct.c --- a/openmp/libomptarget/test/mapping/ompx_hold/struct.c +++ b/openmp/libomptarget/test/mapping/ompx_hold/struct.c @@ -1,20 +1,35 @@ // RUN: %libomptarget-compile-generic -fopenmp-extensions // RUN: %libomptarget-run-generic | %fcheck-generic -strict-whitespace -// Wrong results on amdgpu -// XFAIL: amdgcn-amd-amdhsa - #include #include +#pragma omp begin declare target +char *N1, *N2; +int V1, V2; +#pragma omp declare target + #define CHECK_PRESENCE(Var1, Var2, Var3) \ printf(" presence of %s, %s, %s: %d, %d, %d\n", #Var1, #Var2, #Var3, \ omp_target_is_present(&(Var1), omp_get_default_device()), \ omp_target_is_present(&(Var2), omp_get_default_device()), \ omp_target_is_present(&(Var3), omp_get_default_device())) +#define CHECK_VALUES_HELPER(N1, N2, Var1, Var2) \ + printf(" values of %s, %s: %d, %d\n", N1, N2, (Var1), (Var2)) + +#define CHECK_VALUES_DELAYED(Var1, Var2) \ + N1 = #Var1; \ + N2 = #Var2; \ + V1 = (Var1); \ + V2 = (Var2); + +#define CHECK_DELAYED_VALUS() \ + _Pragma("omp target update from(N1, N2, V1, V2)") \ + CHECK_VALUES_HELPER(N1, N2, V1, V2) + #define CHECK_VALUES(Var1, Var2) \ - printf(" values of %s, %s: %d, %d\n", #Var1, #Var2, (Var1), (Var2)) + CHECK_VALUES_HELPER(#Var1, #Var2, (Var1), (Var2)) int main() { struct S { @@ -132,8 +147,9 @@ #pragma omp target map(to : s.i, s.j) { // No transfer here even though parent's DynRefCount=1. // CHECK-NEXT: values of s.i, s.j: 21, 31 - CHECK_VALUES(s.i, s.j); + CHECK_VALUES_DELAYED(s.i, s.j); } + CHECK_DELAYED_VALUS(); } // CHECK-NEXT: presence of s, s.i, s.j: 0, 0, 0 // CHECK-NEXT: values of s.i, s.j: 21, 31 @@ -162,8 +178,9 @@ #pragma omp target map(ompx_hold, to : s.i, s.j) { // No transfer here even though parent's HoldRefCount=1. // CHECK-NEXT: values of s.i, s.j: 21, 31 - CHECK_VALUES(s.i, s.j); + CHECK_VALUES_DELAYED(s.i, s.j); } + CHECK_DELAYED_VALUS(); } // CHECK-NEXT: presence of s, s.i, s.j: 0, 0, 0 // CHECK-NEXT: values of s.i, s.j: 21, 31 diff --git a/openmp/libomptarget/test/offloading/atomic-compare-signedness.c b/openmp/libomptarget/test/offloading/atomic-compare-signedness.c --- a/openmp/libomptarget/test/offloading/atomic-compare-signedness.c +++ b/openmp/libomptarget/test/offloading/atomic-compare-signedness.c @@ -5,6 +5,8 @@ // RUN: %libomptarget-compile-generic -fopenmp-version=51 // RUN: %libomptarget-run-generic | %fcheck-generic +// RUN: %libomptarget-compileopt-generic -fopenmp-version=51 +// RUN: %libomptarget-run-generic | %fcheck-generic // High parallelism increases our chances of detecting a lack of atomicity. #define NUM_THREADS_TRY 256 diff --git a/openmp/libomptarget/test/offloading/bug51781.c b/openmp/libomptarget/test/offloading/bug51781.c --- a/openmp/libomptarget/test/offloading/bug51781.c +++ b/openmp/libomptarget/test/offloading/bug51781.c @@ -32,9 +32,6 @@ // // CUSTOM: Rewriting generic-mode kernel with a customized state machine. -// Hangs -// UNSUPPORTED: amdgcn-amd-amdhsa - #if ADD_REDUCTION #define REDUCTION(...) reduction(__VA_ARGS__) #else diff --git a/openmp/libomptarget/test/offloading/host_as_target.c b/openmp/libomptarget/test/offloading/host_as_target.c --- a/openmp/libomptarget/test/offloading/host_as_target.c +++ b/openmp/libomptarget/test/offloading/host_as_target.c @@ -7,16 +7,15 @@ // RUN: %libomptarget-compile-run-and-check-generic -// amdgpu does not have a working printf definition -// XFAIL: amdgcn-amd-amdhsa - #include #include static void check(char *X, int Dev) { printf(" host X = %c\n", *X); -#pragma omp target device(Dev) - printf("device X = %c\n", *X); + char DV = -1; +#pragma omp target device(Dev) map(from : DV) + DV = *X; + printf("device X = %c\n", DV); } #define CHECK_DATA() check(&X, DevDefault)