Index: llvm/test/Transforms/OpenMP/mem_transfer_hiding.ll =================================================================== --- llvm/test/Transforms/OpenMP/mem_transfer_hiding.ll +++ llvm/test/Transforms/OpenMP/mem_transfer_hiding.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -openmpopt < %s 2>&1 -; REQUIRES: asserts +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -openmpopt < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -45,6 +45,58 @@ ; return random + a; ;} define dso_local double @heavyComputation1() { +; CHECK-LABEL: @heavyComputation1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RANDOM:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[A:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8 +; CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x i8*], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @rand() +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to double +; CHECK-NEXT: store double [[CONV]], double* [[RANDOM]], align 8 +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @rand() +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[CALL1]], 777 +; CHECK-NEXT: [[CONV2:%.*]] = sitofp i32 [[REM]] to double +; CHECK-NEXT: store double [[CONV2]], double* [[A]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[TMP0]] to double** +; CHECK-NEXT: store double* [[A]], double** [[TMP1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to double** +; CHECK-NEXT: store double* [[A]], double** [[TMP3]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[TMP4]], i8** [[TMP5]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0)) +; CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[A]], align 8 +; CHECK-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to double* +; CHECK-NEXT: store double [[TMP6]], double* [[CONV3]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[A_CASTED]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i64* +; CHECK-NEXT: store i64 [[TMP7]], i64* [[TMP9]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to i64* +; CHECK-NEXT: store i64 [[TMP7]], i64* [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_teams(i64 -1, i8* @.__omp_offloading_heavyComputation1.region_id, i32 1, i8** [[TMP12]], i8** [[TMP13]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.2, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i32 0, i32 0), i32 0, i32 0) +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +; CHECK-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +; CHECK: omp_offload.failed: +; CHECK-NEXT: call void @heavyComputation1FallBack(i64 [[TMP7]]) +; CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] +; CHECK: omp_offload.cont: +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[TMP16]], i8** [[TMP17]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i32 0, i32 0)) +; CHECK-NEXT: [[TMP18:%.*]] = load double, double* [[RANDOM]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = load double, double* [[A]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP18]], [[TMP19]] +; CHECK-NEXT: ret double [[ADD]] +; entry: %random = alloca double, align 8 %a = alloca double, align 8 @@ -163,6 +215,81 @@ ; return random; ;} define dso_local i32 @heavyComputation2(double* %a, i32 %size) { +; CHECK-LABEL: @heavyComputation2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 +; CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RANDOM:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 +; CHECK-NEXT: [[SIZE_CASTED:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x i8*], align 8 +; CHECK-NEXT: store double* [[A:%.*]], double** [[A_ADDR]], align 8 +; CHECK-NEXT: store i32 [[SIZE:%.*]], i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @rand() +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[CALL]], 7 +; CHECK-NEXT: store i32 [[REM]], i32* [[RANDOM]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load double*, double** [[A_ADDR]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[CONV]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to double** +; CHECK-NEXT: store double* [[TMP0]], double** [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double** +; CHECK-NEXT: store double* [[ARRAYIDX]], double** [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32** +; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1 +; CHECK-NEXT: store i64 4, i64* [[TMP13]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +; CHECK-NEXT: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[TMP14]], i8** [[TMP15]], i64* [[TMP16]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i32 0, i32 0)) +; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIZE_CASTED]] to i32* +; CHECK-NEXT: store i32 [[TMP17]], i32* [[CONV1]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, i64* [[SIZE_CASTED]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = load double*, double** [[A_ADDR]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +; CHECK-NEXT: store i64 [[TMP18]], i64* [[TMP21]], align 8 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +; CHECK-NEXT: store i64 [[TMP18]], i64* [[TMP23]], align 8 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to double** +; CHECK-NEXT: store double* [[TMP19]], double** [[TMP25]], align 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to double** +; CHECK-NEXT: store double* [[TMP19]], double** [[TMP27]], align 8 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_teams(i64 -1, i8* @.__omp_offloading_heavyComputation2.region_id, i32 2, i8** [[TMP28]], i8** [[TMP29]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i32 0, i32 0) +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +; CHECK-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +; CHECK: omp_offload.failed: +; CHECK-NEXT: call void @heavyComputation2FallBack(i64 [[TMP18]], double* [[TMP19]]) +; CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] +; CHECK: omp_offload.cont: +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +; CHECK-NEXT: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i32 0, i32 0)) +; CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[RANDOM]], align 4 +; CHECK-NEXT: ret i32 [[TMP35]] +; entry: %a.addr = alloca double*, align 8 %size.addr = alloca i32, align 4 @@ -284,6 +411,81 @@ ; return random; ;} define dso_local i32 @heavyComputation3(double* noalias %a, i32 %size) { +; CHECK-LABEL: @heavyComputation3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 +; CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RANDOM:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 +; CHECK-NEXT: [[SIZE_CASTED:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x i8*], align 8 +; CHECK-NEXT: store double* [[A:%.*]], double** [[A_ADDR]], align 8 +; CHECK-NEXT: store i32 [[SIZE:%.*]], i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @rand() +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[CALL]], 7 +; CHECK-NEXT: store i32 [[REM]], i32* [[RANDOM]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load double*, double** [[A_ADDR]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[CONV]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to double** +; CHECK-NEXT: store double* [[TMP0]], double** [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double** +; CHECK-NEXT: store double* [[ARRAYIDX]], double** [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32** +; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32** +; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 1 +; CHECK-NEXT: store i64 4, i64* [[TMP13]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +; CHECK-NEXT: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[TMP14]], i8** [[TMP15]], i64* [[TMP16]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.5, i32 0, i32 0)) +; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIZE_CASTED]] to i32* +; CHECK-NEXT: store i32 [[TMP17]], i32* [[CONV1]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = load i64, i64* [[SIZE_CASTED]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = load double*, double** [[A_ADDR]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +; CHECK-NEXT: store i64 [[TMP18]], i64* [[TMP21]], align 8 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +; CHECK-NEXT: store i64 [[TMP18]], i64* [[TMP23]], align 8 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to double** +; CHECK-NEXT: store double* [[TMP19]], double** [[TMP25]], align 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8** [[TMP26]] to double** +; CHECK-NEXT: store double* [[TMP19]], double** [[TMP27]], align 8 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_teams(i64 -1, i8* @.__omp_offloading_heavyComputation3.region_id, i32 2, i8** [[TMP28]], i8** [[TMP29]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.6, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.6, i32 0, i32 0), i32 0, i32 0) +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +; CHECK-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +; CHECK: omp_offload.failed: +; CHECK-NEXT: call void @heavyComputation3FallBack(i64 [[TMP18]], double* [[TMP19]]) +; CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] +; CHECK: omp_offload.cont: +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +; CHECK-NEXT: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[TMP32]], i8** [[TMP33]], i64* [[TMP34]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.5, i32 0, i32 0)) +; CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[RANDOM]], align 4 +; CHECK-NEXT: ret i32 [[TMP35]] +; entry: %a.addr = alloca double*, align 8 %size.addr = alloca i32, align 4 @@ -438,6 +640,47 @@ ; return random; ;} define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) { +; CHECK-LABEL: @dataTransferOnly1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 +; CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RANDOM:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8 +; CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [1 x i64], align 8 +; CHECK-NEXT: store double* [[A:%.*]], double** [[A_ADDR]], align 8 +; CHECK-NEXT: store i32 [[SIZE:%.*]], i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @rand() +; CHECK-NEXT: store i32 [[CALL]], i32* [[RANDOM]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load double*, double** [[A_ADDR]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 8 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[CONV]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to double** +; CHECK-NEXT: store double* [[TMP0]], double** [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to double** +; CHECK-NEXT: store double* [[ARRAYIDX]], double** [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP8]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +; CHECK-NEXT: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[TMP9]], i8** [[TMP10]], i64* [[TMP11]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.7, i32 0, i32 0)) +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[RANDOM]], align 4 +; CHECK-NEXT: [[REM:%.*]] = urem i32 [[TMP13]], [[TMP12]] +; CHECK-NEXT: store i32 [[REM]], i32* [[RANDOM]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +; CHECK-NEXT: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[TMP14]], i8** [[TMP15]], i64* [[TMP16]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.7, i32 0, i32 0)) +; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[RANDOM]], align 4 +; CHECK-NEXT: ret i32 [[TMP17]] +; entry: %a.addr = alloca double*, align 8 %size.addr = alloca i32, align 4 @@ -535,4 +778,4 @@ ; FIXME: These two function declarations must be generated after splitting the runtime function ; __tgt_target_data_begin. ; declare dso_local i8* @__tgt_target_data_begin_issue(i64* dereferenceable(8), i32, i8**, i8**, i64*, i64*) -; declare dso_local void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info* dereferenceable(8)) \ No newline at end of file +; declare dso_local void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info* dereferenceable(8))