@@ -347,7 +347,9 @@ define amdgpu_kernel void @test_fold_canonicalize_qNaN_value_f32(float addrspace
347
347
}
348
348
349
349
; GCN-LABEL: test_fold_canonicalize_minnum_value_from_load_f32:
350
- ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
350
+ ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
351
+ ; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
352
+ ; GFX9: flat_store_dword v[{{[0-9:]+}}], [[V]]
351
353
define amdgpu_kernel void @test_fold_canonicalize_minnum_value_from_load_f32 (float addrspace (1 )* %arg ) {
352
354
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
353
355
%gep = getelementptr inbounds float , float addrspace (1 )* %arg , i32 %id
@@ -388,9 +390,11 @@ define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(float addrspace
388
390
}
389
391
390
392
; GCN-LABEL: test_fold_canonicalize_denorm_value_f32:
391
- ; GCN: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
392
- ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
393
- ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
393
+ ; GFX9: v_min_f32_e32 [[V:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
394
+ ; VI: v_min_f32_e32 [[V0:v[0-9]+]], 0x7fffff, v{{[0-9]+}}
395
+ ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
396
+ ; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
397
+ ; GFX9-NOT: 1.0
394
398
define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32 (float addrspace (1 )* %arg ) {
395
399
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
396
400
%gep = getelementptr inbounds float , float addrspace (1 )* %arg , i32 %id
@@ -402,9 +406,11 @@ define amdgpu_kernel void @test_fold_canonicalize_denorm_value_f32(float addrspa
402
406
}
403
407
404
408
; GCN-LABEL: test_fold_canonicalize_maxnum_value_from_load_f32:
405
- ; GCN: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}}
406
- ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
409
+ ; GFX9: v_max_f32_e32 [[V:v[0-9]+]], 0, v{{[0-9]+}}
410
+ ; VI: v_max_f32_e32 [[V0:v[0-9]+]], 0, v{{[0-9]+}}
411
+ ; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[V0]]
407
412
; GCN: flat_store_dword v[{{[0-9:]+}}], [[V]]
413
+ ; GFX9-NOT: 1.0
408
414
define amdgpu_kernel void @test_fold_canonicalize_maxnum_value_from_load_f32 (float addrspace (1 )* %arg ) {
409
415
%id = tail call i32 @llvm.amdgcn.workitem.id.x ()
410
416
%gep = getelementptr inbounds float , float addrspace (1 )* %arg , i32 %id
@@ -465,6 +471,49 @@ entry:
465
471
ret float %canonicalized
466
472
}
467
473
474
+ ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f32
475
+ ; GFX9-DENORM: flat_load_dword [[V:v[0-9]+]],
476
+ ; GFX9-DENORM: flat_store_dword v[{{[0-9:]+}}], [[V]]
477
+ ; GFX9-DENORM-NOT: 1.0
478
+ ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
479
+ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f32 (float addrspace (1 )* %arg , float addrspace (1 )* %out ) #1 {
480
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x ()
481
+ %gep = getelementptr inbounds float , float addrspace (1 )* %arg , i32 %id
482
+ %v = load float , float addrspace (1 )* %gep , align 4
483
+ %canonicalized = tail call float @llvm.canonicalize.f32 (float %v )
484
+ %gep2 = getelementptr inbounds float , float addrspace (1 )* %out , i32 %id
485
+ store float %canonicalized , float addrspace (1 )* %gep2 , align 4
486
+ ret void
487
+ }
488
+
489
+ ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f64
490
+ ; GCN: flat_load_dwordx2 [[V:v\[[0-9:]+\]]],
491
+ ; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[V]]
492
+ ; GCN-NOT: 1.0
493
+ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f64 (double addrspace (1 )* %arg , double addrspace (1 )* %out ) #1 {
494
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x ()
495
+ %gep = getelementptr inbounds double , double addrspace (1 )* %arg , i32 %id
496
+ %v = load double , double addrspace (1 )* %gep , align 8
497
+ %canonicalized = tail call double @llvm.canonicalize.f64 (double %v )
498
+ %gep2 = getelementptr inbounds double , double addrspace (1 )* %out , i32 %id
499
+ store double %canonicalized , double addrspace (1 )* %gep2 , align 8
500
+ ret void
501
+ }
502
+
503
+ ; GCN-LABEL: {{^}}test_fold_canonicalize_load_nnan_value_f16
504
+ ; GCN: flat_load_ushort [[V:v[0-9]+]],
505
+ ; GCN: flat_store_short v[{{[0-9:]+}}], [[V]]
506
+ ; GCN-NOT: 1.0
507
+ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16 (half addrspace (1 )* %arg , half addrspace (1 )* %out ) #1 {
508
+ %id = tail call i32 @llvm.amdgcn.workitem.id.x ()
509
+ %gep = getelementptr inbounds half , half addrspace (1 )* %arg , i32 %id
510
+ %v = load half , half addrspace (1 )* %gep , align 2
511
+ %canonicalized = tail call half @llvm.canonicalize.f16 (half %v )
512
+ %gep2 = getelementptr inbounds half , half addrspace (1 )* %out , i32 %id
513
+ store half %canonicalized , half addrspace (1 )* %gep2 , align 2
514
+ ret void
515
+ }
516
+
468
517
declare float @llvm.canonicalize.f32 (float ) #0
469
518
declare double @llvm.canonicalize.f64 (double ) #0
470
519
declare half @llvm.canonicalize.f16 (half ) #0
@@ -485,3 +534,4 @@ declare float @llvm.maxnum.f32(float, float) #0
485
534
declare double @llvm.maxnum.f64 (double , double ) #0
486
535
487
536
attributes #0 = { nounwind readnone }
537
+ attributes #1 = { "no-nans-fp-math" ="true" }
0 commit comments