Index: llvm/trunk/lib/Transforms/IPO/IPConstantPropagation.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/IPConstantPropagation.cpp +++ llvm/trunk/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -62,32 +62,43 @@ // Ignore blockaddress uses. if (isa(UR)) continue; - // Used by a non-instruction, or not the callee of a function, do not - // transform. - if (!isa(UR) && !isa(UR)) - return false; - - CallSite CS(cast(UR)); - if (!CS.isCallee(&U)) + // If no abstract call site was created we did not understand the use, bail. + AbstractCallSite ACS(&U); + if (!ACS) return false; // Check out all of the potentially constant arguments. Note that we don't // inspect varargs here. - CallSite::arg_iterator AI = CS.arg_begin(); Function::arg_iterator Arg = F.arg_begin(); - for (unsigned i = 0, e = ArgumentConstants.size(); i != e; - ++i, ++AI, ++Arg) { + for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++Arg) { // If this argument is known non-constant, ignore it. if (ArgumentConstants[i].second) continue; - Constant *C = dyn_cast(*AI); + Value *V = ACS.getCallArgOperand(i); + Constant *C = dyn_cast_or_null(V); + + // We can only propagate thread independent values through callbacks. + // This is different to direct/indirect call sites because for them we + // know the thread executing the caller and callee is the same. For + // callbacks this is not guaranteed, thus a thread dependent value could + // be different for the caller and callee, making it invalid to propagate. + if (C && ACS.isCallbackCall() && C->isThreadDependent()) { + // Argument became non-constant. If all arguments are non-constant now, + // give up on this function. + if (++NumNonconstant == ArgumentConstants.size()) + return false; + + ArgumentConstants[i].second = true; + continue; + } + if (C && ArgumentConstants[i].first == nullptr) { ArgumentConstants[i].first = C; // First constant seen. } else if (C && ArgumentConstants[i].first == C) { // Still the constant value we think it is. - } else if (*AI == &*Arg) { + } else if (V == &*Arg) { // Ignore recursive calls passing argument down. } else { // Argument became non-constant. If all arguments are non-constant now, Index: llvm/trunk/test/Transforms/IPConstantProp/multiple_callbacks.ll =================================================================== --- llvm/trunk/test/Transforms/IPConstantProp/multiple_callbacks.ll +++ llvm/trunk/test/Transforms/IPConstantProp/multiple_callbacks.ll @@ -0,0 +1,87 @@ +; RUN: opt -ipconstprop -S < %s | FileCheck %s +; +; +; /---------------------------------------| +; | /----------------------|----| +; | | /-----| | +; V V V | | +; void broker(int (*cb0)(int), int (*cb1)(int), int (*cb2)(int), int, int); +; +; static int cb0(int zero) { +; return zero; +; } +; static int cb1(int unknown) { +; return unknown; +; } +; static int cb2(int unknown) { +; cb0(0); +; return unknown; +; } +; static int cb3(int unknown) { +; return unknown; +; } +; static int cb4(int unknown) { +; return unknown; +; } +; +; void foo() { +; cb0(0); +; cb3(1); +; broker(cb0, cb1, cb0, 0, 1); +; broker(cb1, cb2, cb2, 0, 1); +; broker(cb3, cb2, cb3, 0, 1); +; broker(cb4, cb4, cb4, 0, 1); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define internal i32 @cb0(i32 %zero) { +entry: +; CHECK: @cb0 +; CHECK-NEXT: entry +; CHECK-NEXT: ret i32 0 + ret i32 %zero +} + +define internal i32 @cb1(i32 %unknown) { +entry: +; CHECK: ret i32 %unknown + ret i32 %unknown +} + +define internal i32 @cb2(i32 %unknown) { +entry: + %call = call i32 @cb0(i32 0) +; CHECK: ret i32 %unknown + ret i32 %unknown +} + +define internal i32 @cb3(i32 %unknown) { +entry: +; CHECK: ret i32 %unknown + ret i32 %unknown +} + +define internal i32 @cb4(i32 %unknown) { +entry: +; CHECK: ret i32 %unknown + ret i32 %unknown +} + +define void @foo() { +entry: + %call = call i32 @cb0(i32 0) + %call1 = call i32 @cb3(i32 1) + call void @broker(i32 (i32)* nonnull @cb0, i32 (i32)* nonnull @cb1, i32 (i32)* nonnull @cb0, i32 0, i32 1) + call void @broker(i32 (i32)* nonnull @cb1, i32 (i32)* nonnull @cb2, i32 (i32)* nonnull @cb2, i32 0, i32 1) + call void @broker(i32 (i32)* nonnull @cb3, i32 (i32)* nonnull @cb2, i32 (i32)* nonnull @cb3, i32 0, i32 1) + call void @broker(i32 (i32)* nonnull @cb4, i32 (i32)* nonnull @cb4, i32 (i32)* nonnull @cb4, i32 0, i32 1) + ret void +} + +declare !callback !3 void @broker(i32 (i32)*, i32 (i32)*, i32 (i32)*, i32, i32) + +!0 = !{i64 0, i64 3, i1 false} +!1 = !{i64 1, i64 4, i1 false} +!2 = !{i64 2, i64 3, i1 false} +!3 = !{!0, !2, !1} Index: llvm/trunk/test/Transforms/IPConstantProp/openmp_parallel_for.ll =================================================================== --- llvm/trunk/test/Transforms/IPConstantProp/openmp_parallel_for.ll +++ llvm/trunk/test/Transforms/IPConstantProp/openmp_parallel_for.ll @@ -0,0 +1,120 @@ +; RUN: opt -S -ipconstprop < %s | FileCheck %s +; +; void bar(int, float, double); +; +; void foo(int N) { +; float p = 3; +; double q = 5; +; N = 7; +; +; #pragma omp parallel for firstprivate(q) +; for (int i = 2; i < N; i++) { +; bar(i, p, q); +; } +; } +; +; Verify the constant value of q is propagated into the outlined function. +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 514, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 + +define dso_local void @foo(i32 %N) { +entry: + %N.addr = alloca i32, align 4 + %p = alloca float, align 4 + store i32 %N, i32* %N.addr, align 4 + store float 3.000000e+00, float* %p, align 4 + store i32 7, i32* %N.addr, align 4 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nonnull %N.addr, float* nonnull %p, i64 4617315517961601024) + ret void +} + +define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %N, float* dereferenceable(4) %p, i64 %q) { +entry: + %q.addr = alloca i64, align 8 + %.omp.lb = alloca i32, align 4 + %.omp.ub = alloca i32, align 4 + %.omp.stride = alloca i32, align 4 + %.omp.is_last = alloca i32, align 4 +; CHECK: store i64 4617315517961601024, i64* %q.addr, align 8 + store i64 %q, i64* %q.addr, align 8 + %conv = bitcast i64* %q.addr to double* + %tmp = load i32, i32* %N, align 4 + %sub3 = add nsw i32 %tmp, -3 + %cmp = icmp sgt i32 %tmp, 2 + br i1 %cmp, label %omp.precond.then, label %omp.precond.end + +omp.precond.then: ; preds = %entry + store i32 0, i32* %.omp.lb, align 4 + store i32 %sub3, i32* %.omp.ub, align 4 + store i32 1, i32* %.omp.stride, align 4 + store i32 0, i32* %.omp.is_last, align 4 + %tmp5 = load i32, i32* %.global_tid., align 4 + call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull @0, i32 %tmp5, i32 34, i32* nonnull %.omp.is_last, i32* nonnull %.omp.lb, i32* nonnull %.omp.ub, i32* nonnull %.omp.stride, i32 1, i32 1) + %tmp6 = load i32, i32* %.omp.ub, align 4 + %cmp6 = icmp sgt i32 %tmp6, %sub3 + br i1 %cmp6, label %cond.true, label %cond.false + +cond.true: ; preds = %omp.precond.then + br label %cond.end + +cond.false: ; preds = %omp.precond.then + %tmp7 = load i32, i32* %.omp.ub, align 4 + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32 [ %sub3, %cond.true ], [ %tmp7, %cond.false ] + store i32 %cond, i32* %.omp.ub, align 4 + %tmp8 = load i32, i32* %.omp.lb, align 4 + br label %omp.inner.for.cond + +omp.inner.for.cond: ; preds = %omp.inner.for.inc, %cond.end + %.omp.iv.0 = phi i32 [ %tmp8, %cond.end ], [ %add11, %omp.inner.for.inc ] + %tmp9 = load i32, i32* %.omp.ub, align 4 + %cmp8 = icmp sgt i32 %.omp.iv.0, %tmp9 + br i1 %cmp8, label %omp.inner.for.cond.cleanup, label %omp.inner.for.body + +omp.inner.for.cond.cleanup: ; preds = %omp.inner.for.cond + br label %omp.inner.for.end + +omp.inner.for.body: ; preds = %omp.inner.for.cond + %add10 = add nsw i32 %.omp.iv.0, 2 + %tmp10 = load float, float* %p, align 4 + %tmp11 = load double, double* %conv, align 8 + call void @bar(i32 %add10, float %tmp10, double %tmp11) + br label %omp.body.continue + +omp.body.continue: ; preds = %omp.inner.for.body + br label %omp.inner.for.inc + +omp.inner.for.inc: ; preds = %omp.body.continue + %add11 = add nsw i32 %.omp.iv.0, 1 + br label %omp.inner.for.cond + +omp.inner.for.end: ; preds = %omp.inner.for.cond.cleanup + br label %omp.loop.exit + +omp.loop.exit: ; preds = %omp.inner.for.end + %tmp12 = load i32, i32* %.global_tid., align 4 + call void @__kmpc_for_static_fini(%struct.ident_t* nonnull @0, i32 %tmp12) + br label %omp.precond.end + +omp.precond.end: ; preds = %omp.loop.exit, %entry + ret void +} + +declare dso_local void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) + +declare dso_local void @bar(i32, float, double) + +declare dso_local void @__kmpc_for_static_fini(%struct.ident_t*, i32) + +declare !callback !0 dso_local void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) + +!1 = !{i64 2, i64 -1, i64 -1, i1 true} +!0 = !{!1} Index: llvm/trunk/test/Transforms/IPConstantProp/pthreads.ll =================================================================== --- llvm/trunk/test/Transforms/IPConstantProp/pthreads.ll +++ llvm/trunk/test/Transforms/IPConstantProp/pthreads.ll @@ -0,0 +1,49 @@ +; RUN: opt -ipconstprop -S < %s | FileCheck %s +; +; #include +; +; void *GlobalVPtr; +; +; static void *foo(void *arg) { return arg; } +; static void *bar(void *arg) { return arg; } +; +; int main() { +; pthread_t thread; +; pthread_create(&thread, NULL, foo, NULL); +; pthread_create(&thread, NULL, bar, &GlobalVPtr); +; return 0; +; } +; +; Verify the constant values NULL and &GlobalVPtr are propagated into foo and +; bar, respectively. +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%union.pthread_attr_t = type { i64, [48 x i8] } + +@GlobalVPtr = common dso_local global i8* null, align 8 + +define dso_local i32 @main() { +entry: + %thread = alloca i64, align 8 + %call = call i32 @pthread_create(i64* nonnull %thread, %union.pthread_attr_t* null, i8* (i8*)* nonnull @foo, i8* null) + %call1 = call i32 @pthread_create(i64* nonnull %thread, %union.pthread_attr_t* null, i8* (i8*)* nonnull @bar, i8* bitcast (i8** @GlobalVPtr to i8*)) + ret i32 0 +} + +declare !callback !0 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*) + +define internal i8* @foo(i8* %arg) { +entry: +; CHECK: ret i8* null + ret i8* %arg +} + +define internal i8* @bar(i8* %arg) { +entry: +; CHECK: ret i8* bitcast (i8** @GlobalVPtr to i8*) + ret i8* %arg +} + +!1 = !{i64 2, i64 3, i1 false} +!0 = !{!1} Index: llvm/trunk/test/Transforms/IPConstantProp/thread_local_acs.ll =================================================================== --- llvm/trunk/test/Transforms/IPConstantProp/thread_local_acs.ll +++ llvm/trunk/test/Transforms/IPConstantProp/thread_local_acs.ll @@ -0,0 +1,42 @@ +; RUN: opt -ipconstprop -S < %s | FileCheck %s +; +; #include +; thread_local int gtl = 0; +; int gsh = 0; +; +; static int callee(int *thread_local_ptr, int *shared_ptr) { +; return *thread_local_ptr + *shared_ptr; +; } +; +; void broker(int *, int (*callee)(int *, int *), int *); +; +; void caller() { +; broker(>l, callee, &gsh); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@gtl = dso_local thread_local global i32 0, align 4 +@gsh = dso_local global i32 0, align 4 + +define internal i32 @callee(i32* %thread_local_ptr, i32* %shared_ptr) { +entry: +; CHECK: %tmp = load i32, i32* %thread_local_ptr, align 4 +; CHECK: %tmp1 = load i32, i32* @gsh, align 4 +; CHECK: %add = add nsw i32 %tmp, %tmp1 + %tmp = load i32, i32* %thread_local_ptr, align 4 + %tmp1 = load i32, i32* %shared_ptr, align 4 + %add = add nsw i32 %tmp, %tmp1 + ret i32 %add +} + +define dso_local void @caller() { +entry: + call void @broker(i32* nonnull @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nonnull @gsh) + ret void +} + +declare !callback !0 dso_local void @broker(i32*, i32 (i32*, i32*)*, i32*) + +!1 = !{i64 1, i64 0, i64 2, i1 false} +!0 = !{!1}