diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "CGDebugInfo.h" +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" @@ -1106,8 +1107,13 @@ // FIXME: Clean this up by using an LValue for ReturnTemp, // EmitStoreThroughLValue, and EmitAnyExpr. - if (getLangOpts().ElideConstructors && - S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable()) { + // Check if the NRVO candidate was not globalized in OpenMP mode. + if (getLangOpts().ElideConstructors && S.getNRVOCandidate() && + S.getNRVOCandidate()->isNRVOVariable() && + (!getLangOpts().OpenMP || + !CGM.getOpenMPRuntime() + .getAddressOfLocalVariable(*this, S.getNRVOCandidate()) + .isValid())) { // Apply the named return value optimization for this return statement, // which means doing nothing: the appropriate result has already been // constructed into the NRVO variable. diff --git a/clang/test/OpenMP/nvptx_NRVO_variable.cpp b/clang/test/OpenMP/nvptx_NRVO_variable.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/nvptx_NRVO_variable.cpp @@ -0,0 +1,30 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct S { + int a; + S() : a(1) {} +}; + +#pragma omp declare target +void bar(S &); +// CHECK-LABEL: foo +S foo() { + // CHECK: [[RETVAL:%.+]] = alloca %struct.S, + S s; + // CHECK: call void @{{.+}}bar{{.+}}(%struct.S* {{.*}}[[S_REF:%.+]]) + bar(s); + // CHECK: [[DEST:%.+]] = bitcast %struct.S* [[RETVAL]] to i8* + // CHECK: [[SOURCE:%.+]] = bitcast %struct.S* [[S_REF]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}[[DEST]], i8* {{.*}}[[SOURCE]], i64 4, i1 false) + // CHECK: [[VAL:%.+]] = load %struct.S, %struct.S* [[RETVAL]], + // CHECK: ret %struct.S [[VAL]] + return s; +} +#pragma omp end declare target + +#endif