diff --git a/openmp/libomptarget/DeviceRTL/src/Debug.cpp b/openmp/libomptarget/DeviceRTL/src/Debug.cpp --- a/openmp/libomptarget/DeviceRTL/src/Debug.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Debug.cpp @@ -38,7 +38,7 @@ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) int32_t vprintf(const char *, void *); namespace impl { -static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { +int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { return vprintf(Format, Arguments); } } // namespace impl @@ -47,7 +47,7 @@ // We do not have a vprintf implementation for AMD GPU yet so we use a stub. #pragma omp begin declare variant match(device = {arch(amdgcn)}) namespace impl { -static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { +int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) { return -1; } } // namespace impl diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -46,7 +46,7 @@ ///{ #pragma omp begin declare variant match(device = {arch(amdgcn)}) -static const llvm::omp::GV &getGridValue() { +const llvm::omp::GV &getGridValue() { return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>(); } @@ -121,9 +121,7 @@ return __nvvm_read_ptx_sreg_ntid_x(); } -static const llvm::omp::GV &getGridValue() { - return llvm::omp::NVPTXGridValues; -} +const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; } LaneMaskTy activemask() { unsigned int Mask; diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp --- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp @@ -158,52 +158,52 @@ break; case 16: GlobalArgs[15] = args[15]; - // FALLTHROUGH + [[fallthrough]]; case 15: GlobalArgs[14] = args[14]; - // FALLTHROUGH + [[fallthrough]]; case 14: GlobalArgs[13] = args[13]; - // FALLTHROUGH + [[fallthrough]]; case 13: GlobalArgs[12] = args[12]; - // FALLTHROUGH + [[fallthrough]]; case 12: GlobalArgs[11] = args[11]; - // FALLTHROUGH + [[fallthrough]]; case 11: GlobalArgs[10] = args[10]; - // FALLTHROUGH + [[fallthrough]]; case 10: GlobalArgs[9] = args[9]; - // FALLTHROUGH + [[fallthrough]]; case 9: GlobalArgs[8] = args[8]; - // FALLTHROUGH + [[fallthrough]]; case 8: GlobalArgs[7] = args[7]; - // FALLTHROUGH + [[fallthrough]]; case 7: GlobalArgs[6] = args[6]; - // FALLTHROUGH + [[fallthrough]]; case 6: GlobalArgs[5] = args[5]; - // FALLTHROUGH + [[fallthrough]]; case 5: GlobalArgs[4] = args[4]; - // FALLTHROUGH + [[fallthrough]]; case 4: GlobalArgs[3] = args[3]; - // FALLTHROUGH + [[fallthrough]]; case 3: GlobalArgs[2] = args[2]; - // FALLTHROUGH + [[fallthrough]]; case 2: GlobalArgs[1] = args[1]; - // FALLTHROUGH + [[fallthrough]]; case 1: GlobalArgs[0] = args[0]; - // FALLTHROUGH + [[fallthrough]]; case 0: break; } diff --git a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp --- a/openmp/libomptarget/DeviceRTL/src/Reduction.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Reduction.cpp @@ -167,8 +167,8 @@ uint32_t kmpcMin(uint32_t x, uint32_t y) { return x < y ? x : y; } -static volatile uint32_t IterCnt = 0; -static volatile uint32_t Cnt = 0; +static uint32_t IterCnt = 0; +static uint32_t Cnt = 0; } // namespace @@ -211,7 +211,7 @@ // to the number of slots in the buffer. bool IsMaster = (ThreadId == 0); while (IsMaster) { - Bound = atomic::load((uint32_t *)&IterCnt, __ATOMIC_SEQ_CST); + Bound = atomic::load(&IterCnt, __ATOMIC_SEQ_CST); if (TeamId < Bound + num_of_records) break; } @@ -228,8 +228,7 @@ // Increment team counter. // This counter is incremented by all teams in the current // BUFFER_SIZE chunk. - ChunkTeamCount = - atomic::inc((uint32_t *)&Cnt, num_of_records - 1u, __ATOMIC_SEQ_CST); + ChunkTeamCount = atomic::inc(&Cnt, num_of_records - 1u, __ATOMIC_SEQ_CST); } // Synchronize if (mapping::isSPMDMode()) @@ -305,8 +304,7 @@ if (IsMaster && ChunkTeamCount == num_of_records - 1) { // Allow SIZE number of teams to proceed writing their // intermediate results to the global buffer. - atomic::add((uint32_t *)&IterCnt, uint32_t(num_of_records), - __ATOMIC_SEQ_CST); + atomic::add(&IterCnt, uint32_t(num_of_records), __ATOMIC_SEQ_CST); } return 0; diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -298,14 +298,8 @@ return ThreadStates[TId]->ICVState.*Var; } -uint32_t &lookup32Impl(uint32_t ICVStateTy::*Var) { - uint32_t TId = mapping::getThreadIdInBlock(); - if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId])) - return ThreadStates[TId]->ICVState.*Var; - return TeamState.ICVState.*Var; -} -uint64_t &lookup64Impl(uint64_t ICVStateTy::*Var) { - uint64_t TId = mapping::getThreadIdInBlock(); +template IntTy &lookupImpl(IntTy ICVStateTy::*Var) { + IntTy TId = mapping::getThreadIdInBlock(); if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId])) return ThreadStates[TId]->ICVState.*Var; return TeamState.ICVState.*Var; @@ -330,27 +324,27 @@ switch (Kind) { case state::VK_NThreads: if (IsReadonly) - return lookup32Impl(&ICVStateTy::NThreadsVar); + return lookupImpl(&ICVStateTy::NThreadsVar); return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident); case state::VK_Level: if (IsReadonly) - return lookup32Impl(&ICVStateTy::LevelVar); + return lookupImpl(&ICVStateTy::LevelVar); return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident); case state::VK_ActiveLevel: if (IsReadonly) - return lookup32Impl(&ICVStateTy::ActiveLevelVar); + return lookupImpl(&ICVStateTy::ActiveLevelVar); return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident); case state::VK_MaxActiveLevels: if (IsReadonly) - return lookup32Impl(&ICVStateTy::MaxActiveLevelsVar); + return lookupImpl(&ICVStateTy::MaxActiveLevelsVar); return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident); case state::VK_RunSched: if (IsReadonly) - return lookup32Impl(&ICVStateTy::RunSchedVar); + return lookupImpl(&ICVStateTy::RunSchedVar); return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident); case state::VK_RunSchedChunk: if (IsReadonly) - return lookup32Impl(&ICVStateTy::RunSchedChunkVar); + return lookupImpl(&ICVStateTy::RunSchedChunkVar); return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident); case state::VK_ParallelTeamSize: return TeamState.ParallelTeamSize; diff --git a/openmp/libomptarget/DeviceRTL/src/Workshare.cpp b/openmp/libomptarget/DeviceRTL/src/Workshare.cpp --- a/openmp/libomptarget/DeviceRTL/src/Workshare.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Workshare.cpp @@ -139,6 +139,7 @@ numberOfActiveOMPThreads); break; } + [[fallthrough]]; } // note: if chunk <=0, use nochunk case kmp_sched_static_balanced_chunk: { if (chunk > 0) { @@ -157,6 +158,7 @@ ub = oldUb; break; } + [[fallthrough]]; } // note: if chunk <=0, use nochunk case kmp_sched_static_nochunk: { ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid, @@ -168,8 +170,9 @@ ForStaticChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(), omp_get_num_teams()); break; - } // note: if chunk <=0, use nochunk - } + } + [[fallthrough]]; + } // note: if chunk <=0, use nochunk case kmp_sched_distr_static_nochunk: { ForStaticNoChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(), omp_get_num_teams()); @@ -341,7 +344,7 @@ uint32_t change = utils::popc(active); __kmpc_impl_lanemask_t lane_mask_lt = mapping::lanemaskLT(); unsigned int rank = utils::popc(active & lane_mask_lt); - uint64_t warp_res; + uint64_t warp_res = 0; if (rank == 0) { warp_res = atomic::add(&Cnt, change, __ATOMIC_SEQ_CST); }