Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -187,6 +187,11 @@ __OMP_RTL(__kmpc_cancel_barrier, false, Int32, IdentPtr, Int32) __OMP_RTL(__kmpc_flush, false, Void, IdentPtr) __OMP_RTL(__kmpc_global_thread_num, false, Int32, IdentPtr) +__OMP_RTL(__kmpc_for_static_init_4, false, Void, IdentPtr, + Int32 /*gtid*/, Int32 /*sched_type*/, + Int32Ptr /*plastiter*/, + Int32Ptr /*plower*/, Int32Ptr /*pupper*/, Int32Ptr /*pstride*/, + Int32 /*incr*/, Int32 /*chunk*/) __OMP_RTL(__kmpc_fork_call, true, Void, IdentPtr, Int32, ParallelTaskPtr) __OMP_RTL(__kmpc_omp_taskwait, false, Int32, IdentPtr, Int32) __OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, Int32) Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -114,6 +114,7 @@ Changed |= deduplicateRuntimeCalls(); Changed |= deleteParallelRegions(); + Changed |= addRangeMetadataToOMPLoopBounds(); return Changed; } @@ -201,6 +202,88 @@ return Changed; } + /// Adds range metadata to loads and stores of the omp.(ub/lb). + /// A conservative range is set by setting the range equal to iteration + /// of the loop. + bool addRangeMetadataToOMPLoopBounds() { + bool Changed = false; + RuntimeFunctionInfo &RFI = RFIs[OMPRTL___kmpc_for_static_init_4]; + + if (!RFI.Declaration) + return false; + + auto setRangeCB = [&](Use &U, Function &F) { + CallInst *staticForCall = getCallIfRegularCall(U); + if (!staticForCall) + return false; + + Value *OMPLBVal = staticForCall->getArgOperand(4); + Value *OMPUBVal = staticForCall->getArgOperand(5); + + StoreInst *storeToLB = NULL, *storeToUB = NULL; + LoadInst *loadFromLB = NULL, *loadFromUB = NULL; + + /// getLoadStoreGuardingStaticFor: sets nearest store to 'boundVal' + /// preceding 'staticForCall' & nearest / load to 'boundVal' succeeding + /// staticForCall' + auto getLoadStoreGuardingStaticFor = + [staticForCall](Value *boundVal, LoadInst *&load, StoreInst *&store) { + bool foundCall = false; + Value::use_iterator use_iter = boundVal->use_begin(), + use_end = boundVal->use_end(); + + for (; use_iter != use_end; ++use_iter) { + Value *v = use_iter->getUser(); + if (!foundCall) { + if (v == staticForCall) + foundCall = true; + else if (isa(v)) + load = dyn_cast(v); + } else { + if (isa(v)) { + store = dyn_cast(v); + break; + } + } + } + }; + + getLoadStoreGuardingStaticFor(OMPLBVal, loadFromLB, storeToLB); + getLoadStoreGuardingStaticFor(OMPUBVal, loadFromUB, storeToUB); + + if (isa(storeToLB->getValueOperand()) && + isa(storeToUB->getValueOperand())) { + LLVMContext &Context = staticForCall->getParent()->getContext(); + + ConstantInt *low = dyn_cast(storeToLB->getValueOperand()); + ConstantInt *highMinusOne = + dyn_cast(storeToUB->getValueOperand()); + ConstantInt *high = ConstantInt::get( + highMinusOne->getType(), (highMinusOne->getSExtValue()) + 1); + + Metadata *lowAndHigh[] = { + ConstantAsMetadata::get(low), + ConstantAsMetadata::get(high), + }; + loadFromLB->setMetadata(LLVMContext::MD_range, + MDNode::get(Context, lowAndHigh)); + loadFromUB->setMetadata(LLVMContext::MD_range, + MDNode::get(Context, lowAndHigh)); + Changed = true; + } else + LLVM_DEBUG( + dbgs() + << "[addRangeMetadataToOMPLoopBounds]: Unable to set the ranges" + << " as the iteration zone of '" << *staticForCall + << "' isn't compile time constant.\n"); + + return true; + }; + + RFI.foreachUse(setRangeCB); + return Changed; + } + static Value *combinedIdentStruct(Value *Ident0, Value *Ident1, bool GlobalOnly) { // TODO: Figure out how to actually combine multiple debug locations. For Index: llvm/test/Transforms/OpenMP/set_bound_ranges.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/OpenMP/set_bound_ranges.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -openmpopt -S < %s | FileCheck %s +; RUN: opt -passes=openmpopt -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 514, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@2 = private unnamed_addr global %struct.ident_t { i32 0, i32 66, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 + +; Function Attrs: noinline nounwind uwtable +define dso_local void @foo(i32 %N) #0 { +entry: + %N.addr = alloca i32, align 4 + %.omp.iv = alloca i32, align 4 + %tmp = alloca i32, align 4 + %.omp.lb = alloca i32, align 4 + %.omp.ub = alloca i32, align 4 + %.omp.stride = alloca i32, align 4 + %.omp.is_last = alloca i32, align 4 + %i = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) + store i32 %N, i32* %N.addr, align 4 + store i32 0, i32* %.omp.lb, align 4 + store i32 195, i32* %.omp.ub, align 4 + store i32 1, i32* %.omp.stride, align 4 + store i32 0, i32* %.omp.is_last, align 4 + call void @__kmpc_for_static_init_4(%struct.ident_t* @0, i32 %0, i32 34, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1) + %1 = load i32, i32* %.omp.ub, align 4 +; CHECK: %1 = load i32, i32* %.omp.ub, align 4, !range ![[RANGE0:[0-9]+]] + %cmp = icmp sgt i32 %1, 195 + br i1 %cmp, label %cond.true, label %cond.false + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + %2 = load i32, i32* %.omp.ub, align 4 + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32 [ 195, %cond.true ], [ %2, %cond.false ] + store i32 %cond, i32* %.omp.ub, align 4 + %3 = load i32, i32* %.omp.lb, align 4 +; CHECK: %3 = load i32, i32* %.omp.lb, align 4, !range ![[RANGE0]] + store i32 %3, i32* %.omp.iv, align 4 + br label %omp.inner.for.cond + +omp.inner.for.cond: ; preds = %omp.inner.for.inc, %cond.end + %4 = load i32, i32* %.omp.iv, align 4 + %5 = load i32, i32* %.omp.ub, align 4 + %cmp1 = icmp sle i32 %4, %5 + br i1 %cmp1, label %omp.inner.for.body, label %omp.inner.for.end + +omp.inner.for.body: ; preds = %omp.inner.for.cond + %6 = load i32, i32* %.omp.iv, align 4 + %mul = mul nsw i32 %6, 1 + %add = add nsw i32 0, %mul + store i32 %add, i32* %i, align 4 + %call = call i32 (...) @bar() + br label %omp.body.continue + +omp.body.continue: ; preds = %omp.inner.for.body + br label %omp.inner.for.inc + +omp.inner.for.inc: ; preds = %omp.body.continue + %7 = load i32, i32* %.omp.iv, align 4 + %add2 = add nsw i32 %7, 1 + store i32 %add2, i32* %.omp.iv, align 4 + br label %omp.inner.for.cond + +omp.inner.for.end: ; preds = %omp.inner.for.cond + br label %omp.loop.exit + +omp.loop.exit: ; preds = %omp.inner.for.end + call void @__kmpc_for_static_fini(%struct.ident_t* @0, i32 %0) + call void @__kmpc_barrier(%struct.ident_t* @2, i32 %0) + ret void +} + +declare dso_local i32 @__kmpc_global_thread_num(%struct.ident_t*) + +declare dso_local void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) + +declare dso_local i32 @bar(...) #1 + +declare dso_local void @__kmpc_for_static_fini(%struct.ident_t*, i32) + +declare dso_local void @__kmpc_barrier(%struct.ident_t*, i32) + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project 7264cf4e457e759a84bcac45882cad50628dbc15)"} +; CHECK: ![[RANGE0]] = !{i32 0, i32 196}