Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -319,7 +319,6 @@ // Emit target region as a standalone region. auto &&CodeGen = [&EST, &WST, &CS, &D, this](CodeGenFunction &CGF) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - (void)CGF.EmitOMPFirstprivateClause(D, PrivateScope); CGF.EmitOMPPrivateClause(D, PrivateScope); (void)PrivateScope.Privatize(); Index: test/OpenMP/nvptx_target_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/nvptx_target_firstprivate_codegen.cpp +++ test/OpenMP/nvptx_target_firstprivate_codegen.cpp @@ -30,12 +30,8 @@ // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[A1:%.+]] = alloca i{{[0-9]+}}, // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], // TCHECK-64: [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* - // TCHECK-64: [[A_ADDR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]], - // TCHECK-32: [[A_ADDR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR]], - // TCHECK: store i{{[0-9]+}} [[A_ADDR_VAL]], i{{[0-9]+}}* [[A1]], // TCHECK: ret void #pragma omp target firstprivate(aa,b,c,d) @@ -47,17 +43,12 @@ d.Y = 1; } - // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the - // target region + // the input parameters to the offloading function are already private variables to the target region // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]]) // TCHECK: [[A2_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x float]*, // TCHECK: [[C_ADDR:%.+]] = alloca [5 x [10 x double]]*, // TCHECK: [[D_ADDR:%.+]] = alloca [[TT]]*, - // TCHECK: [[A2_PRIV:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[B_PRIV:%.+]] = alloca [10 x float], - // TCHECK: [[C_PRIV:%.+]] = alloca [5 x [10 x double]], - // TCHECK: [[D_PRIV:%.+]] = alloca [[TT]], // TCHECK: store i{{[0-9]+}} [[A2_IN]], i{{[0-9]+}}* [[A2_ADDR]], // TCHECK: store [10 x float]* [[B_IN]], [10 x float]** [[B_ADDR]], // TCHECK: store [5 x [10 x double]]* [[C_IN]], [5 x [10 x double]]** [[C_ADDR]], @@ -67,37 +58,36 @@ // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** [[D_ADDR]], - // firstprivate(aa): a_priv = a_in - // TCHECK: [[A2_CONV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV_A2ADDR]], - // TCHECK: store i{{[0-9]+}} [[A2_CONV_VAL]], i{{[0-9]+}}* [[A2_PRIV]], - - // firstprivate(b): memcpy(b_priv,b_in) - // TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x float]* [[B_PRIV]] to i8* - // TCHECK: [[B_ADDR_REF_BCAST:%.+]] = bitcast [10 x float]* [[B_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_ADDR_REF_BCAST]], {{.+}}) - - // firstprivate(c) - // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8* - // TCHECK: [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}}) + // aa += 1 (only check load and store are from and to right variable) + // TCHECK: {{.+}} = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV_A2ADDR]], + // TCHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[CONV_A2ADDR]], - // firstprivate(d) - // TCHECK: [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8* - // TCHECK: [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}}) - + // b[2] = 1.0 + // TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x float], [10 x float]* [[B_ADDR_REF]], + // TCHECK: store float {{.+}}, float* [[B_GEP]], + + // c[1][2] = 1.0 + // TCHECK: [[B_GEP1:%.+]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C_ADDR_REF]], + // TCHECK: [[B_GEP2:%.+]] = getelementptr inbounds [10 x double], [10 x double]* [[B_GEP1]], + // TCHECK: store double {{.+}}, double* [[B_GEP2]], - #pragma omp target firstprivate(ptr) + // d.X = 1; d.Y = 1 + // TCHECK: [[X:%.+]] = getelementptr inbounds [[TT]], [[TT]]* [[D_ADDR_REF]], + // TCHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[X]] + // TCHECK: [[Y:%.+]] = getelementptr inbounds [[TT]], [[TT]]* [[D_ADDR_REF]], + // TCHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[Y]] + +#pragma omp target firstprivate(ptr) { ptr[0]++; } // TCHECK: define void @__omp_offloading_{{.+}}(double* [[PTR_IN:%.+]]) // TCHECK: [[PTR_ADDR:%.+]] = alloca double*, - // TCHECK: [[PTR_PRIV:%.+]] = alloca double*, // TCHECK: store double* [[PTR_IN]], double** [[PTR_ADDR]], // TCHECK: [[PTR_IN_REF:%.+]] = load double*, double** [[PTR_ADDR]], - // TCHECK: store double* [[PTR_IN_REF]], double** [[PTR_PRIV]], + // TCHECK: [[PTR_GEP:%.+]] = getelementptr inbounds double, double* [[PTR_IN_REF]], + // TCHECK: store double {{.+}}, double* [[PTR_GEP]], return a; } @@ -137,9 +127,6 @@ // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, -// TCHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[A3_PRIV:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], // TCHECK: store i{{[0-9]+}} [[A3_IN]], i{{[0-9]+}}* [[A3_ADDR]], // TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], @@ -147,19 +134,19 @@ // TCHECK: [[A3_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A3_ADDR]] to i8* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], -// firstprivate(a): a_priv = a_in -// TCHECK-64: [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_CONV]], -// TCHECK-32: [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR]], -// TCHECK: store i{{[0-9]+}} [[A_IN_VAL]], i{{[0-9]+}}* [[A_PRIV]], +// a += 1 +// TCHECK-64: {{.+}} = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_CONV]], +// TCHECK-32: {{.+}} = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR]], +// TCHECK-64: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A_CONV]], +// TCHECK-32: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A_ADDR]], -// firstprivate(aaa) -// TCHECK: [[A3_IN_VAL:%.+]] = load i8, i8* [[A3_CONV]], -// TCHECK: store i{{[0-9]+}} [[A3_IN_VAL]], i{{[0-9]+}}* [[A3_PRIV]], +// aaa += 1 +// TCHECK: {{.+}} = load i{{[0-9]+}}, i{{[0-9]+}}* [[A3_CONV]], +// TCHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A3_CONV]], -// firstprivate(b) -// TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* -// TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}}) +// b[2] += 1 +// TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_ADDR_REF]], +// TCHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[B_GEP]], // TCHECK: ret void @@ -180,17 +167,14 @@ // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]]) // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}}, // TCHECK: store [[S1]]* [[TH]], [[S1]]** [[TH_ADDR]], // TCHECK: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[B_ADDR]], // TCHECK: [[TH_ADDR_REF:%.+]] = load [[S1]]*, [[S1]]** [[TH_ADDR]], // TCHECK-64: [[B_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[B_ADDR]] to i{{[0-9]+}}* - // firstprivate(b) + // this->a = (double)b + 1.5; (only check access to b) // TCHECK-64: [[B_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_ADDR_CONV]], - // TCHECK-32: [[B_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_ADDR]], - // TCHECK: store i{{[0-9]+}} [[B_IN_VAL]], i{{[0-9]+}}* [[B_PRIV]], // TCHECK: ret void }; @@ -213,22 +197,20 @@ // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, -// TCHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], // TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], // TCHECK-64: [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], -// firstprivate(a) +// a += 1 // TCHECK-64: [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR_CONV]] // TCHECK-32: [[A_IN_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_ADDR]] -// TCHECK: store i{{[0-9]+}} [[A_IN_VAL]], i{{[0-9]+}}* [[A_PRIV]], +// TCHECK-64: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A_ADDR_CONV]], +// TCHECK-32: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A_ADDR]], -// firstprivate(b) -// TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* -// TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[B_PRIV_BCAST]], i8* [[B_IN_BCAST]],{{.+}}) +// b[2] += 1 +// TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_ADDR_REF]], +// TCHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[B_GEP]], // TCHECK: ret void