Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -43,6 +43,8 @@ "Number of OpenMP runtime functions identified"); STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, "Number of OpenMP runtime function uses identified"); +STATISTIC(NumOpenMPLoopsRangeAnnotated, + "Number of OpenMP for directives annotated with !range"); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) static constexpr auto TAG = "[" DEBUG_TYPE "]"; @@ -88,7 +90,7 @@ /// functions). size_t getNumArgs() const { return ArgumentTypes.size(); } - /// Run the callback \p CB on each use and forget the use if the result is + /// Run the callback \p CB on each use and forget the use if CB returns /// true. The callback will be fed the function in which the use was /// encountered as second argument. void foreachUse(function_ref CB) { @@ -204,7 +206,7 @@ /// Adds range metadata to loads and stores of the omp.(ub/lb). /// A conservative range is set by setting the range equal to iteration - /// of the loop. + /// space of the underlying loop. bool addRangeMetadataToOMPLoopBounds() { bool Changed = false; RuntimeFunctionInfo &RFI = RFIs[OMPRTL___kmpc_for_static_init_4]; @@ -212,75 +214,82 @@ if (!RFI.Declaration) return false; - auto setRangeCB = [&](Use &U, Function &F) { - CallInst *staticForCall = getCallIfRegularCall(U); - if (!staticForCall) + auto SetRangeCB = [&](Use &U, Function &F) { + CallInst *StaticForCall = getCallIfRegularCall(U); + if (!StaticForCall) return false; - Value *OMPLBVal = staticForCall->getArgOperand(4); - Value *OMPUBVal = staticForCall->getArgOperand(5); - - StoreInst *storeToLB = NULL, *storeToUB = NULL; - LoadInst *loadFromLB = NULL, *loadFromUB = NULL; - - /// getLoadStoreGuardingStaticFor: sets nearest store to 'boundVal' - /// preceding 'staticForCall' & nearest / load to 'boundVal' succeeding - /// staticForCall' - auto getLoadStoreGuardingStaticFor = - [staticForCall](Value *boundVal, LoadInst *&load, StoreInst *&store) { - bool foundCall = false; - Value::use_iterator use_iter = boundVal->use_begin(), - use_end = boundVal->use_end(); - - for (; use_iter != use_end; ++use_iter) { - Value *v = use_iter->getUser(); - if (!foundCall) { - if (v == staticForCall) - foundCall = true; - else if (isa(v)) - load = dyn_cast(v); - } else { - if (isa(v)) { - store = dyn_cast(v); - break; - } - } + static constexpr unsigned KMPC_FOR_STATIC_INIT_LB = 4; + static constexpr unsigned KMPC_FOR_STATIC_INIT_UB = 5; + + Value *OMPLBVal = StaticForCall->getArgOperand(KMPC_FOR_STATIC_INIT_LB); + Value *OMPUBVal = StaticForCall->getArgOperand(KMPC_FOR_STATIC_INIT_UB); + + StoreInst *StoreToLB = nullptr, *StoreToUB = nullptr; + LoadInst *LoadFromLB = nullptr, *LoadFromUB = nullptr; + + /// GetLoadStoreGuardingStaticFor: sets nearest store to 'BoundVal' + /// preceding 'StaticForCall' & nearest load to 'BoundVal' succeeding + /// StaticForCall' + auto GetLoadStoreGuardingStaticFor = [StaticForCall](Value *BoundVal, + LoadInst *&Load, + StoreInst *&Store) { + bool FoundCall = false; + Value::use_iterator I = BoundVal->use_begin(), E = BoundVal->use_end(); + + for (; I != E; ++I) { + Value *V = I->getUser(); + if (!FoundCall) { + if (V == StaticForCall) + FoundCall = true; + else if (isa(V)) + Load = dyn_cast(V); + } else { + if (isa(V)) { + Store = dyn_cast(V); + break; } - }; - - getLoadStoreGuardingStaticFor(OMPLBVal, loadFromLB, storeToLB); - getLoadStoreGuardingStaticFor(OMPUBVal, loadFromUB, storeToUB); - - if (isa(storeToLB->getValueOperand()) && - isa(storeToUB->getValueOperand())) { - LLVMContext &Context = staticForCall->getParent()->getContext(); + } + } + }; - ConstantInt *low = dyn_cast(storeToLB->getValueOperand()); - ConstantInt *highMinusOne = - dyn_cast(storeToUB->getValueOperand()); - ConstantInt *high = ConstantInt::get( - highMinusOne->getType(), (highMinusOne->getSExtValue()) + 1); + GetLoadStoreGuardingStaticFor(OMPLBVal, LoadFromLB, StoreToLB); + GetLoadStoreGuardingStaticFor(OMPUBVal, LoadFromUB, StoreToUB); - Metadata *lowAndHigh[] = { - ConstantAsMetadata::get(low), - ConstantAsMetadata::get(high), - }; - loadFromLB->setMetadata(LLVMContext::MD_range, - MDNode::get(Context, lowAndHigh)); - loadFromUB->setMetadata(LLVMContext::MD_range, - MDNode::get(Context, lowAndHigh)); - Changed = true; - } else + if (!isa(StoreToLB->getValueOperand()) || + !isa(StoreToUB->getValueOperand())) { LLVM_DEBUG( dbgs() << "[addRangeMetadataToOMPLoopBounds]: Unable to set the ranges" - << " as the iteration zone of '" << *staticForCall + << " as the iteration zone of '" << *StaticForCall << "' isn't compile time constant.\n"); + return false; + } else { + LLVMContext &Context = StaticForCall->getParent()->getContext(); + + ConstantInt *Low = dyn_cast(StoreToLB->getValueOperand()); + ConstantInt *HighMinusOne = + dyn_cast(StoreToUB->getValueOperand()); + ConstantInt *High = ConstantInt::get( + HighMinusOne->getType(), (HighMinusOne->getSExtValue()) + 1); + + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(Low), + ConstantAsMetadata::get(High), + }; + LoadFromLB->setMetadata(LLVMContext::MD_range, + MDNode::get(Context, LowAndHigh)); + LoadFromUB->setMetadata(LLVMContext::MD_range, + MDNode::get(Context, LowAndHigh)); + Changed = true; + ++NumOpenMPLoopsRangeAnnotated; + } - return true; + // Don't clear 'Use U' in 'UsesMap' + return false; }; - RFI.foreachUse(setRangeCB); + RFI.foreachUse(SetRangeCB); return Changed; } Index: llvm/test/Transforms/OpenMP/set_bound_ranges.ll =================================================================== --- llvm/test/Transforms/OpenMP/set_bound_ranges.ll +++ llvm/test/Transforms/OpenMP/set_bound_ranges.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes ; RUN: opt -openmpopt -S < %s | FileCheck %s ; RUN: opt -passes=openmpopt -S < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -10,8 +9,8 @@ @1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 @2 = private unnamed_addr global %struct.ident_t { i32 0, i32 66, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 -; Function Attrs: noinline nounwind uwtable -define dso_local void @foo(i32 %N) #0 { +define dso_local void @foo(i32 %N) { +;CHECK: void @foo( entry: %N.addr = alloca i32, align 4 %.omp.iv = alloca i32, align 4 @@ -84,18 +83,110 @@ declare dso_local void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) -declare dso_local i32 @bar(...) #1 +declare dso_local i32 @bar(...) declare dso_local void @__kmpc_for_static_fini(%struct.ident_t*, i32) declare dso_local void @__kmpc_barrier(%struct.ident_t*, i32) -attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +define dso_local void @test_runtime_it_space(i32 %N) { +;CHECK: void @test_runtime_it_space( +entry: + %N.addr = alloca i32, align 4 + %.omp.iv = alloca i32, align 4 + %tmp = alloca i32, align 4 + %.capture_expr. = alloca i32, align 4 + %.capture_expr.1 = alloca i32, align 4 + %i = alloca i32, align 4 + %.omp.lb = alloca i32, align 4 + %.omp.ub = alloca i32, align 4 + %.omp.stride = alloca i32, align 4 + %.omp.is_last = alloca i32, align 4 + %i4 = alloca i32, align 4 + %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) + store i32 %N, i32* %N.addr, align 4 + %1 = load i32, i32* %N.addr, align 4 + store i32 %1, i32* %.capture_expr., align 4 + %2 = load i32, i32* %.capture_expr., align 4 + %sub = sub nsw i32 %2, 0 + %sub2 = sub nsw i32 %sub, 1 + %add = add nsw i32 %sub2, 1 + %div = sdiv i32 %add, 1 + %sub3 = sub nsw i32 %div, 1 + store i32 %sub3, i32* %.capture_expr.1, align 4 + store i32 0, i32* %i, align 4 + %3 = load i32, i32* %.capture_expr., align 4 + %cmp = icmp slt i32 0, %3 + br i1 %cmp, label %omp.precond.then, label %omp.precond.end + +omp.precond.then: ; preds = %entry + store i32 0, i32* %.omp.lb, align 4 + %4 = load i32, i32* %.capture_expr.1, align 4 + store i32 %4, i32* %.omp.ub, align 4 + store i32 1, i32* %.omp.stride, align 4 + store i32 0, i32* %.omp.is_last, align 4 + call void @__kmpc_for_static_init_4(%struct.ident_t* @0, i32 %0, i32 34, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1) + %5 = load i32, i32* %.omp.ub, align 4 +;CHECK: %5 = load i32, i32* %.omp.ub, align 4 +;CHECK-NOT: !range + %6 = load i32, i32* %.capture_expr.1, align 4 + %cmp5 = icmp sgt i32 %5, %6 + br i1 %cmp5, label %cond.true, label %cond.false + +cond.true: ; preds = %omp.precond.then + %7 = load i32, i32* %.capture_expr.1, align 4 + br label %cond.end + +cond.false: ; preds = %omp.precond.then + %8 = load i32, i32* %.omp.ub, align 4 + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32 [ %7, %cond.true ], [ %8, %cond.false ] + store i32 %cond, i32* %.omp.ub, align 4 + %9 = load i32, i32* %.omp.lb, align 4 +;CHECK: %9 = load i32, i32* %.omp.lb, align 4 +;CHECK-NOT: !range + store i32 %9, i32* %.omp.iv, align 4 + br label %omp.inner.for.cond + +omp.inner.for.cond: ; preds = %omp.inner.for.inc, %cond.end + %10 = load i32, i32* %.omp.iv, align 4 + %11 = load i32, i32* %.omp.ub, align 4 + %cmp6 = icmp sle i32 %10, %11 + br i1 %cmp6, label %omp.inner.for.body, label %omp.inner.for.end + +omp.inner.for.body: ; preds = %omp.inner.for.cond + %12 = load i32, i32* %.omp.iv, align 4 + %mul = mul nsw i32 %12, 1 + %add7 = add nsw i32 0, %mul + store i32 %add7, i32* %i4, align 4 + %call = call i32 (...) @baz() + br label %omp.body.continue + +omp.body.continue: ; preds = %omp.inner.for.body + br label %omp.inner.for.inc + +omp.inner.for.inc: ; preds = %omp.body.continue + %13 = load i32, i32* %.omp.iv, align 4 + %add8 = add nsw i32 %13, 1 + store i32 %add8, i32* %.omp.iv, align 4 + br label %omp.inner.for.cond + +omp.inner.for.end: ; preds = %omp.inner.for.cond + br label %omp.loop.exit + +omp.loop.exit: ; preds = %omp.inner.for.end + call void @__kmpc_for_static_fini(%struct.ident_t* @0, i32 %0) + br label %omp.precond.end + +omp.precond.end: ; preds = %omp.loop.exit, %entry + call void @__kmpc_barrier(%struct.ident_t* @2, i32 %0) + ret void +;CHECK: ret void +} + +declare dso_local i32 @baz(...) -!llvm.module.flags = !{!0} -!llvm.ident = !{!1} -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project 7264cf4e457e759a84bcac45882cad50628dbc15)"} ; CHECK: ![[RANGE0]] = !{i32 0, i32 196}