diff --git a/clang/test/CodeGen/ppc64le-varargs-f128.c b/clang/test/CodeGen/ppc64le-varargs-f128.c --- a/clang/test/CodeGen/ppc64le-varargs-f128.c +++ b/clang/test/CodeGen/ppc64le-varargs-f128.c @@ -5,11 +5,51 @@ // RUN: -target-cpu pwr9 -target-feature +float128 \ // RUN: -o - %s | FileCheck %s -check-prefix=IBM +// RUN: %clang_cc1 -triple ppc64le -emit-llvm-bc %s -target-cpu pwr9 \ +// RUN: -target-feature +float128 -mabi=ieeelongdouble -fopenmp \ +// RUN: -fopenmp-targets=ppc64le -o %t-ppc-host.bc +// RUN: %clang_cc1 -triple ppc64le -aux-triple ppc64le %s -target-cpu pwr9 \ +// RUN: -target-feature +float128 -fopenmp -fopenmp-is-device -emit-llvm \ +// RUN: -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s \ +// RUN: -check-prefix=OMP-TARGET +// RUN: %clang_cc1 %t-ppc-host.bc -emit-llvm -o - | FileCheck %s \ +// RUN: -check-prefix=OMP-HOST + #include void foo_ld(long double); void foo_fq(__float128); +// Verify cases when OpenMP target's and host's long-double semantics differ. + +// OMP-TARGET-LABEL: define internal void @.omp_outlined.( +// OMP-TARGET: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** +// OMP-TARGET: %[[V2:[0-9a-zA-Z_.]+]] = bitcast i8* %[[CUR]] to ppc_fp128* +// OMP-TARGET: %[[V3:[0-9a-zA-Z_.]+]] = load ppc_fp128, ppc_fp128* %[[V2]], align 8 +// OMP-TARGET: call void @foo_ld(ppc_fp128 %[[V3]]) + +// OMP-HOST-LABEL: define void @omp( +// OMP-HOST: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* +// OMP-HOST: call void @llvm.va_start(i8* %[[AP1]]) +// OMP-HOST: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** %[[AP]], align 8 +// OMP-HOST: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint i8* %[[CUR]] to i64 +// OMP-HOST: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15 +// OMP-HOST: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16 +// OMP-HOST: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to i8* +// OMP-HOST: %[[V3:[0-9a-zA-Z_.]+]] = bitcast i8* %[[ALIGN]] to fp128* +// OMP-HOST: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, fp128* %[[V3]], align 16 +// OMP-HOST: call void @foo_ld(fp128 %[[V4]]) +void omp(int n, ...) { + va_list ap; + va_start(ap, n); + foo_ld(va_arg(ap, long double)); + #pragma omp target parallel + for (int i = 1; i < n; ++i) { + foo_ld(va_arg(ap, long double)); + } + va_end(ap); +} + // IEEE-LABEL: define void @f128 // IEEE: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* // IEEE: call void @llvm.va_start(i8* %[[AP1]]) diff --git a/clang/test/Driver/ppc-openmp-f128.c b/clang/test/Driver/ppc-openmp-f128.c deleted file mode 100644 --- a/clang/test/Driver/ppc-openmp-f128.c +++ /dev/null @@ -1,39 +0,0 @@ -// RUN: %clang -target powerpc64le-unknown-linux-gnu -S -emit-llvm \ -// RUN: -fopenmp-targets=ppc64le -mfloat128 -mabi=ieeelongdouble -mcpu=pwr9 \ -// RUN: -Xopenmp-target=ppc64le -mcpu=pwr9 -Xopenmp-target=ppc64le \ -// RUN: -mfloat128 -fopenmp=libomp -o - %s | FileCheck %s -check-prefix=OMP - -#include - -void foo_ld(long double); -void foo_fq(__float128); - -// Verify cases when OpenMP target's and host's long-double semantics differ. - -// OMP-LABEL: define internal void @.omp_outlined. -// OMP: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** -// OMP: %[[V2:[0-9a-zA-Z_.]+]] = bitcast i8* %[[CUR]] to ppc_fp128* -// OMP: %[[V3:[0-9a-zA-Z_.]+]] = load ppc_fp128, ppc_fp128* %[[V2]], align 8 -// OMP: call void @foo_ld(ppc_fp128 %[[V3]]) - -// OMP-LABEL: define dso_local void @omp -// OMP: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* -// OMP: call void @llvm.va_start(i8* %[[AP1]]) -// OMP: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** %[[AP]], align 8 -// OMP: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint i8* %[[CUR]] to i64 -// OMP: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15 -// OMP: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16 -// OMP: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to i8* -// OMP: %[[V3:[0-9a-zA-Z_.]+]] = bitcast i8* %[[ALIGN]] to fp128* -// OMP: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, fp128* %[[V3]], align 16 -// OMP: call void @foo_ld(fp128 %[[V4]]) -void omp(int n, ...) { - va_list ap; - va_start(ap, n); - foo_ld(va_arg(ap, long double)); - #pragma omp target parallel - for (int i = 1; i < n; ++i) { - foo_ld(va_arg(ap, long double)); - } - va_end(ap); -}