diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -219,7 +219,16 @@ if (!ReplVal) { for (Use *U : Uses) if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { - CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); + Value *Op0 = CI->getOperand(0); + Instruction *MovePt = &*F.getEntryBlock().getFirstInsertionPt(); + if (Instruction *DefInst = dyn_cast(Op0)) { + if (DefInst->getParent() != &F.getEntryBlock()) + break; + if (!DefInst->comesBefore(MovePt)) { + MovePt = DefInst->getNextNode(); + } + } + CI->moveBefore(MovePt); ReplVal = CI; break; } diff --git a/llvm/test/Transforms/OpenMP/gtid.ll b/llvm/test/Transforms/OpenMP/gtid.ll --- a/llvm/test/Transforms/OpenMP/gtid.ll +++ b/llvm/test/Transforms/OpenMP/gtid.ll @@ -15,7 +15,8 @@ ; CHECK-LABEL: define {{[^@]+}}@external ; CHECK-SAME: (i1 [[C:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) +; CHECK-NEXT: %tmp1 = alloca %struct.ident_t +; CHECK-NEXT: [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull %tmp1) ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]] ; CHECK: t: ; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]]) @@ -31,19 +32,20 @@ ; CHECK-NEXT: ret void ; entry: + %tmp1 = alloca %struct.ident_t, align 8 br i1 %c, label %t, label %e t: - %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull %tmp1) call void @internal(i32 %c0, i32 %c0) call void @useI32(i32 %c0) br label %m e: - %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull %tmp1) call void @internal(i32 %c1, i32 %c1) call void @useI32(i32 %c1) br label %m m: - %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull %tmp1) call void @internal(i32 0, i32 %c2) call void @useI32(i32 %c2) ret void