Changeset View
Changeset View
Standalone View
Standalone View
test/GPGPU/libdevice-functions-copied-into-kernel.ll
Show All 14 Lines | |||||
; Check that we model the kernel as a scop. | ; Check that we model the kernel as a scop. | ||||
; SCOP: Function: f | ; SCOP: Function: f | ||||
; SCOP-NEXT: Region: %entry.split---%for.end | ; SCOP-NEXT: Region: %entry.split---%for.end | ||||
; Check that the intrinsic call is present in the kernel IR. | ; Check that the intrinsic call is present in the kernel IR. | ||||
; KERNEL-IR: %p_expf = tail call float @__nv_expf(float %A.arr.i.val_p_scalar_) | ; KERNEL-IR: %p_expf = tail call float @__nv_expf(float %A.arr.i.val_p_scalar_) | ||||
; KERNEL-IR: %p_cosf = tail call float @__nv_cosf(float %p_expf) | ; KERNEL-IR: %p_cosf = tail call float @__nv_cosf(float %p_expf) | ||||
; KERNEL-IR: %p_logf = tail call float @__nv_logf(float %p_cosf) | |||||
; Check that kernel launch is generated in host IR. | ; Check that kernel launch is generated in host IR. | ||||
; the declare would not be generated unless a call to a kernel exists. | ; the declare would not be generated unless a call to a kernel exists. | ||||
; HOST-IR: declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*) | ; HOST-IR: declare void @polly_launchKernel(i8*, i32, i32, i32, i32, i32, i8*) | ||||
; void f(float *A, float *B, int N) { | ; void f(float *A, float *B, int N) { | ||||
; for(int i = 0; i < N; i++) { | ; for(int i = 0; i < N; i++) { | ||||
; float tmp0 = A[i]; | ; float tmp0 = A[i]; | ||||
; float tmp1 = expf(tmp1); | ; float expf = expf(tmp1); | ||||
; tmp1 = cosf(tmp1); | ; cosf = cosf(expf); | ||||
; B[i] = tmp1; | ; logf = logf(cosf); | ||||
; B[i] = logf; | |||||
; } | ; } | ||||
; } | ; } | ||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" | target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" | ||||
define void @f(float* %A, float* %B, i32 %N) { | define void @f(float* %A, float* %B, i32 %N) { | ||||
entry: | entry: | ||||
br label %entry.split | br label %entry.split | ||||
entry.split: ; preds = %entry | entry.split: ; preds = %entry | ||||
%cmp1 = icmp sgt i32 %N, 0 | %cmp1 = icmp sgt i32 %N, 0 | ||||
br i1 %cmp1, label %for.body.lr.ph, label %for.end | br i1 %cmp1, label %for.body.lr.ph, label %for.end | ||||
for.body.lr.ph: ; preds = %entry.split | for.body.lr.ph: ; preds = %entry.split | ||||
br label %for.body | br label %for.body | ||||
for.body: ; preds = %for.body.lr.ph, %for.body | for.body: ; preds = %for.body.lr.ph, %for.body | ||||
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] | %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] | ||||
%A.arr.i = getelementptr inbounds float, float* %A, i64 %indvars.iv | %A.arr.i = getelementptr inbounds float, float* %A, i64 %indvars.iv | ||||
%A.arr.i.val = load float, float* %A.arr.i, align 4 | %A.arr.i.val = load float, float* %A.arr.i, align 4 | ||||
; Call to intrinsics that should be part of the kernel. | ; Call to intrinsics that should be part of the kernel. | ||||
%expf = tail call float @expf(float %A.arr.i.val) | %expf = tail call float @expf(float %A.arr.i.val) | ||||
%cosf = tail call float @cosf(float %expf) | %cosf = tail call float @cosf(float %expf) | ||||
%logf = tail call float @logf(float %cosf) | |||||
%B.arr.i = getelementptr inbounds float, float* %B, i64 %indvars.iv | %B.arr.i = getelementptr inbounds float, float* %B, i64 %indvars.iv | ||||
store float %expf, float* %B.arr.i, align 4 | store float %logf, float* %B.arr.i, align 4 | ||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | ||||
%wide.trip.count = zext i32 %N to i64 | %wide.trip.count = zext i32 %N to i64 | ||||
%exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count | %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count | ||||
br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge | br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge | ||||
for.cond.for.end_crit_edge: ; preds = %for.body | for.cond.for.end_crit_edge: ; preds = %for.body | ||||
br label %for.end | br label %for.end | ||||
for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split | for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split | ||||
ret void | ret void | ||||
} | } | ||||
; Function Attrs: nounwind readnone | ; Function Attrs: nounwind readnone | ||||
declare float @expf(float) #0 | declare float @expf(float) #0 | ||||
declare float @cosf(float) #0 | declare float @cosf(float) #0 | ||||
declare float @logf(float) #0 | |||||
attributes #0 = { nounwind readnone } | attributes #0 = { nounwind readnone } | ||||