Page MenuHomePhabricator

No OneTemporary

File Metadata

Created
Jan 24 2020, 4:12 PM
Index: llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll (revision 301936)
+++ llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll (revision 301937)
@@ -1,1540 +1,1540 @@
; RUN: opt -instcombine -S < %s | FileCheck %s
; --------------------------------------------------------------------
; llvm.amdgcn.rcp
; --------------------------------------------------------------------
declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
; CHECK-LABEL: @test_constant_fold_rcp_f32_undef
; CHECK-NEXT: ret float undef
define float @test_constant_fold_rcp_f32_undef() nounwind {
%val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone
ret float %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f32_1
; CHECK-NEXT: ret float 1.000000e+00
define float @test_constant_fold_rcp_f32_1() nounwind {
%val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone
ret float %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f64_1
; CHECK-NEXT: ret double 1.000000e+00
define double @test_constant_fold_rcp_f64_1() nounwind {
%val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone
ret double %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f32_half
; CHECK-NEXT: ret float 2.000000e+00
define float @test_constant_fold_rcp_f32_half() nounwind {
%val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone
ret float %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f64_half
; CHECK-NEXT: ret double 2.000000e+00
define double @test_constant_fold_rcp_f64_half() nounwind {
%val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone
ret double %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f32_43
; CHECK-NEXT: call float @llvm.amdgcn.rcp.f32(float 4.300000e+01)
define float @test_constant_fold_rcp_f32_43() nounwind {
%val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone
ret float %val
}
; CHECK-LABEL: @test_constant_fold_rcp_f64_43
; CHECK-NEXT: call double @llvm.amdgcn.rcp.f64(double 4.300000e+01)
define double @test_constant_fold_rcp_f64_43() nounwind {
%val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
ret double %val
}
; --------------------------------------------------------------------
; llvm.amdgcn.rsq
; --------------------------------------------------------------------
declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
; CHECK-LABEL: @test_constant_fold_rsq_f32_undef
; CHECK-NEXT: ret float undef
define float @test_constant_fold_rsq_f32_undef() nounwind {
%val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone
ret float %val
}
; --------------------------------------------------------------------
; llvm.amdgcn.frexp.mant
; --------------------------------------------------------------------
declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone
declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone
; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef(
; CHECK-NEXT: ret float undef
define float @test_constant_fold_frexp_mant_f32_undef() nounwind {
%val = call float @llvm.amdgcn.frexp.mant.f32(float undef)
ret float %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_undef(
; CHECK-NEXT: ret double undef
define double @test_constant_fold_frexp_mant_f64_undef() nounwind {
%val = call double @llvm.amdgcn.frexp.mant.f64(double undef)
ret double %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_0(
; CHECK-NEXT: ret float 0.000000e+00
define float @test_constant_fold_frexp_mant_f32_0() nounwind {
%val = call float @llvm.amdgcn.frexp.mant.f32(float 0.0)
ret float %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_0(
; CHECK-NEXT: ret double 0.000000e+00
define double @test_constant_fold_frexp_mant_f64_0() nounwind {
%val = call double @llvm.amdgcn.frexp.mant.f64(double 0.0)
ret double %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n0(
; CHECK-NEXT: ret float -0.000000e+00
define float @test_constant_fold_frexp_mant_f32_n0() nounwind {
%val = call float @llvm.amdgcn.frexp.mant.f32(float -0.0)
ret float %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n0(
; CHECK-NEXT: ret double -0.000000e+00
define double @test_constant_fold_frexp_mant_f64_n0() nounwind {
%val = call double @llvm.amdgcn.frexp.mant.f64(double -0.0)
ret double %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_1(
; CHECK-NEXT: ret float 5.000000e-01
define float @test_constant_fold_frexp_mant_f32_1() nounwind {
%val = call float @llvm.amdgcn.frexp.mant.f32(float 1.0)
ret float %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_1(
; CHECK-NEXT: ret double 5.000000e-01
define double @test_constant_fold_frexp_mant_f64_1() nounwind {
%val = call double @llvm.amdgcn.frexp.mant.f64(double 1.0)
ret double %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n1(
; CHECK-NEXT: ret float -5.000000e-01
define float @test_constant_fold_frexp_mant_f32_n1() nounwind {
%val = call float @llvm.amdgcn.frexp.mant.f32(float -1.0)
ret float %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n1(
; CHECK-NEXT: ret double -5.000000e-01
define double @test_constant_fold_frexp_mant_f64_n1() nounwind {
%val = call double @llvm.amdgcn.frexp.mant.f64(double -1.0)
ret double %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_nan(
; CHECK-NEXT: ret float 0x7FF8000000000000
define float @test_constant_fold_frexp_mant_f32_nan() nounwind {
%val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF8000000000000)
ret float %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_nan(
; CHECK-NEXT: ret double 0x7FF8000000000000
define double @test_constant_fold_frexp_mant_f64_nan() nounwind {
%val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF8000000000000)
ret double %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_inf(
; CHECK-NEXT: ret float 0x7FF0000000000000
define float @test_constant_fold_frexp_mant_f32_inf() nounwind {
%val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF0000000000000)
ret float %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_inf(
; CHECK-NEXT: ret double 0x7FF0000000000000
define double @test_constant_fold_frexp_mant_f64_inf() nounwind {
%val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF0000000000000)
ret double %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_ninf(
; CHECK-NEXT: ret float 0xFFF0000000000000
define float @test_constant_fold_frexp_mant_f32_ninf() nounwind {
%val = call float @llvm.amdgcn.frexp.mant.f32(float 0xFFF0000000000000)
ret float %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_ninf(
; CHECK-NEXT: ret double 0xFFF0000000000000
define double @test_constant_fold_frexp_mant_f64_ninf() nounwind {
%val = call double @llvm.amdgcn.frexp.mant.f64(double 0xFFF0000000000000)
ret double %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_max_num(
; CHECK-NEXT: ret float 0x3FEFFFFFE0000000
define float @test_constant_fold_frexp_mant_f32_max_num() nounwind {
%val = call float @llvm.amdgcn.frexp.mant.f32(float 0x47EFFFFFE0000000)
ret float %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_max_num(
; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFF
define double @test_constant_fold_frexp_mant_f64_max_num() nounwind {
%val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FEFFFFFFFFFFFFF)
ret double %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_min_num(
; CHECK-NEXT: ret float 5.000000e-01
define float @test_constant_fold_frexp_mant_f32_min_num() nounwind {
%val = call float @llvm.amdgcn.frexp.mant.f32(float 0x36A0000000000000)
ret float %val
}
; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_min_num(
; CHECK-NEXT: ret double 5.000000e-01
define double @test_constant_fold_frexp_mant_f64_min_num() nounwind {
%val = call double @llvm.amdgcn.frexp.mant.f64(double 4.940656e-324)
ret double %val
}
; --------------------------------------------------------------------
; llvm.amdgcn.frexp.exp
; --------------------------------------------------------------------
declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone
declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef(
; CHECK-NEXT: ret i32 undef
define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float undef)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_undef(
; CHECK-NEXT: ret i32 undef
define i32 @test_constant_fold_frexp_exp_f64_undef() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double undef)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_0(
; CHECK-NEXT: ret i32 0
define i32 @test_constant_fold_frexp_exp_f32_0() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_0(
; CHECK-NEXT: ret i32 0
define i32 @test_constant_fold_frexp_exp_f64_0() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n0(
; CHECK-NEXT: ret i32 0
define i32 @test_constant_fold_frexp_exp_f32_n0() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float -0.0)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n0(
; CHECK-NEXT: ret i32 0
define i32 @test_constant_fold_frexp_exp_f64_n0() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double -0.0)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1024(
; CHECK-NEXT: ret i32 11
define i32 @test_constant_fold_frexp_exp_f32_1024() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float 1024.0)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1024(
; CHECK-NEXT: ret i32 11
define i32 @test_constant_fold_frexp_exp_f64_1024() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double 1024.0)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n1024(
; CHECK-NEXT: ret i32 11
define i32 @test_constant_fold_frexp_exp_f32_n1024() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float -1024.0)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n1024(
; CHECK-NEXT: ret i32 11
define i32 @test_constant_fold_frexp_exp_f64_n1024() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double -1024.0)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1_1024(
; CHECK-NEXT: ret i32 -9
define i32 @test_constant_fold_frexp_exp_f32_1_1024() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0009765625)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1_1024(
; CHECK-NEXT: ret i32 -9
define i32 @test_constant_fold_frexp_exp_f64_1_1024() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0009765625)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_nan(
; CHECK-NEXT: ret i32 0
define i32 @test_constant_fold_frexp_exp_f32_nan() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF8000000000000)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_nan(
; CHECK-NEXT: ret i32 0
define i32 @test_constant_fold_frexp_exp_f64_nan() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF8000000000000)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_inf(
; CHECK-NEXT: ret i32 0
define i32 @test_constant_fold_frexp_exp_f32_inf() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF0000000000000)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_inf(
; CHECK-NEXT: ret i32 0
define i32 @test_constant_fold_frexp_exp_f64_inf() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF0000000000000)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_ninf(
; CHECK-NEXT: ret i32 0
define i32 @test_constant_fold_frexp_exp_f32_ninf() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0xFFF0000000000000)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_ninf(
; CHECK-NEXT: ret i32 0
define i32 @test_constant_fold_frexp_exp_f64_ninf() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0xFFF0000000000000)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_max_num(
; CHECK-NEXT: ret i32 128
define i32 @test_constant_fold_frexp_exp_f32_max_num() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x47EFFFFFE0000000)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_max_num(
; CHECK-NEXT: ret i32 1024
define i32 @test_constant_fold_frexp_exp_f64_max_num() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FEFFFFFFFFFFFFF)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_min_num(
; CHECK-NEXT: ret i32 -148
define i32 @test_constant_fold_frexp_exp_f32_min_num() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x36A0000000000000)
ret i32 %val
}
; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_min_num(
; CHECK-NEXT: ret i32 -1073
define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind {
%val = call i32 @llvm.amdgcn.frexp.exp.f64(double 4.940656e-324)
ret i32 %val
}
; --------------------------------------------------------------------
; llvm.amdgcn.class
; --------------------------------------------------------------------
declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone
declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone
; CHECK-LABEL: @test_class_undef_mask_f32(
; CHECK: ret i1 false
define i1 @test_class_undef_mask_f32(float %x) nounwind {
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef)
ret i1 %val
}
; CHECK-LABEL: @test_class_over_max_mask_f32(
; CHECK: %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1)
define i1 @test_class_over_max_mask_f32(float %x) nounwind {
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025)
ret i1 %val
}
; CHECK-LABEL: @test_class_no_mask_f32(
; CHECK: ret i1 false
define i1 @test_class_no_mask_f32(float %x) nounwind {
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 0)
ret i1 %val
}
; CHECK-LABEL: @test_class_full_mask_f32(
; CHECK: ret i1 true
define i1 @test_class_full_mask_f32(float %x) nounwind {
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1023)
ret i1 %val
}
; CHECK-LABEL: @test_class_undef_no_mask_f32(
; CHECK: ret i1 false
define i1 @test_class_undef_no_mask_f32() nounwind {
%val = call i1 @llvm.amdgcn.class.f32(float undef, i32 0)
ret i1 %val
}
; CHECK-LABEL: @test_class_undef_full_mask_f32(
; CHECK: ret i1 true
define i1 @test_class_undef_full_mask_f32() nounwind {
%val = call i1 @llvm.amdgcn.class.f32(float undef, i32 1023)
ret i1 %val
}
; CHECK-LABEL: @test_class_undef_val_f32(
; CHECK: ret i1 undef
define i1 @test_class_undef_val_f32() nounwind {
%val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4)
ret i1 %val
}
; CHECK-LABEL: @test_class_undef_undef_f32(
; CHECK: ret i1 undef
define i1 @test_class_undef_undef_f32() nounwind {
%val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
ret i1 %val
}
; CHECK-LABEL: @test_class_var_mask_f32(
; CHECK: %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask)
define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind {
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask)
ret i1 %val
}
; CHECK-LABEL: @test_class_isnan_f32(
; CHECK: %val = fcmp uno float %x, 0.000000e+00
define i1 @test_class_isnan_f32(float %x) nounwind {
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_snan_test_snan_f64(
; CHECK: ret i1 true
define i1 @test_constant_class_snan_test_snan_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 1)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_qnan_test_qnan_f64(
; CHECK: ret i1 true
define i1 @test_constant_class_qnan_test_qnan_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 2)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_qnan_test_snan_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_qnan_test_snan_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 1)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_ninf_test_ninf_f64(
; CHECK: ret i1 true
define i1 @test_constant_class_ninf_test_ninf_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 4)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_pinf_test_ninf_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_pinf_test_ninf_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 4)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_qnan_test_ninf_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_qnan_test_ninf_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 4)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_snan_test_ninf_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_snan_test_ninf_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 4)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_nnormal_test_nnormal_f64(
; CHECK: ret i1 true
define i1 @test_constant_class_nnormal_test_nnormal_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 8)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_pnormal_test_nnormal_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_pnormal_test_nnormal_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 8)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_nsubnormal_test_nsubnormal_f64(
; CHECK: ret i1 true
define i1 @test_constant_class_nsubnormal_test_nsubnormal_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 16)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_psubnormal_test_nsubnormal_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_psubnormal_test_nsubnormal_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 16)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_nzero_test_nzero_f64(
; CHECK: ret i1 true
define i1 @test_constant_class_nzero_test_nzero_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 32)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_pzero_test_nzero_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_pzero_test_nzero_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 32)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_pzero_test_pzero_f64(
; CHECK: ret i1 true
define i1 @test_constant_class_pzero_test_pzero_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 64)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_nzero_test_pzero_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_nzero_test_pzero_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 64)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_psubnormal_test_psubnormal_f64(
; CHECK: ret i1 true
define i1 @test_constant_class_psubnormal_test_psubnormal_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 128)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_nsubnormal_test_psubnormal_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_nsubnormal_test_psubnormal_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 128)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_pnormal_test_pnormal_f64(
; CHECK: ret i1 true
define i1 @test_constant_class_pnormal_test_pnormal_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 256)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_nnormal_test_pnormal_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_nnormal_test_pnormal_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 256)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_pinf_test_pinf_f64(
; CHECK: ret i1 true
define i1 @test_constant_class_pinf_test_pinf_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 512)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_ninf_test_pinf_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_ninf_test_pinf_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 512)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_qnan_test_pinf_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_qnan_test_pinf_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 512)
ret i1 %val
}
; CHECK-LABEL: @test_constant_class_snan_test_pinf_f64(
; CHECK: ret i1 false
define i1 @test_constant_class_snan_test_pinf_f64() nounwind {
%val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 512)
ret i1 %val
}
; --------------------------------------------------------------------
; llvm.amdgcn.cos
; --------------------------------------------------------------------
declare float @llvm.amdgcn.cos.f32(float) nounwind readnone
declare float @llvm.fabs.f32(float) nounwind readnone
; CHECK-LABEL: @cos_fneg_f32(
; CHECK: %cos = call float @llvm.amdgcn.cos.f32(float %x)
; CHECK-NEXT: ret float %cos
define float @cos_fneg_f32(float %x) {
%x.fneg = fsub float -0.0, %x
%cos = call float @llvm.amdgcn.cos.f32(float %x.fneg)
ret float %cos
}
; CHECK-LABEL: @cos_fabs_f32(
; CHECK-NEXT: %cos = call float @llvm.amdgcn.cos.f32(float %x)
; CHECK-NEXT: ret float %cos
define float @cos_fabs_f32(float %x) {
%x.fabs = call float @llvm.fabs.f32(float %x)
%cos = call float @llvm.amdgcn.cos.f32(float %x.fabs)
ret float %cos
}
; CHECK-LABEL: @cos_fabs_fneg_f32(
; CHECK-NEXT: %cos = call float @llvm.amdgcn.cos.f32(float %x)
; CHECK-NEXT: ret float %cos
define float @cos_fabs_fneg_f32(float %x) {
%x.fabs = call float @llvm.fabs.f32(float %x)
%x.fabs.fneg = fsub float -0.0, %x.fabs
%cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
ret float %cos
}
; --------------------------------------------------------------------
; llvm.amdgcn.cvt.pkrtz
; --------------------------------------------------------------------
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
; CHECK-LABEL: @vars_lhs_cvt_pkrtz(
; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
define <2 x half> @vars_lhs_cvt_pkrtz(float %x, float %y) {
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
ret <2 x half> %cvt
}
; CHECK-LABEL: @constant_lhs_cvt_pkrtz(
; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float %y)
define <2 x half> @constant_lhs_cvt_pkrtz(float %y) {
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float %y)
ret <2 x half> %cvt
}
; CHECK-LABEL: @constant_rhs_cvt_pkrtz(
; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.000000e+00)
define <2 x half> @constant_rhs_cvt_pkrtz(float %x) {
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.0)
ret <2 x half> %cvt
}
; CHECK-LABEL: @undef_lhs_cvt_pkrtz(
; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
define <2 x half> @undef_lhs_cvt_pkrtz(float %y) {
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
ret <2 x half> %cvt
}
; CHECK-LABEL: @undef_rhs_cvt_pkrtz(
; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
define <2 x half> @undef_rhs_cvt_pkrtz(float %x) {
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
ret <2 x half> %cvt
}
; CHECK-LABEL: @undef_cvt_pkrtz(
; CHECK: ret <2 x half> undef
define <2 x half> @undef_cvt_pkrtz() {
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
ret <2 x half> %cvt
}
; CHECK-LABEL: @constant_splat0_cvt_pkrtz(
; CHECK: ret <2 x half> zeroinitializer
define <2 x half> @constant_splat0_cvt_pkrtz() {
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float 0.0)
ret <2 x half> %cvt
}
; CHECK-LABEL: @constant_cvt_pkrtz(
; CHECK: ret <2 x half> <half 0xH4000, half 0xH4400>
define <2 x half> @constant_cvt_pkrtz() {
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 2.0, float 4.0)
ret <2 x half> %cvt
}
; Test constant values where rtz changes result
; CHECK-LABEL: @constant_rtz_pkrtz(
; CHECK: ret <2 x half> <half 0xH7BFF, half 0xH7BFF>
define <2 x half> @constant_rtz_pkrtz() {
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0)
ret <2 x half> %cvt
}
; --------------------------------------------------------------------
; llvm.amdgcn.ubfe
; --------------------------------------------------------------------
declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone
declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone
; CHECK-LABEL: @ubfe_var_i32(
; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
define i32 @ubfe_var_i32(i32 %src, i32 %offset, i32 %width) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_clear_high_bits_constant_offset_i32(
; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 5, i32 %width)
define i32 @ubfe_clear_high_bits_constant_offset_i32(i32 %src, i32 %width) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 133, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_clear_high_bits_constant_width_i32(
; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 5)
define i32 @ubfe_clear_high_bits_constant_width_i32(i32 %src, i32 %offset) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 133)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_width_0(
; CHECK-NEXT: ret i32 0
define i32 @ubfe_width_0(i32 %src, i32 %offset) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 0)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_width_31(
; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
define i32 @ubfe_width_31(i32 %src, i32 %offset) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_width_32(
; CHECK-NEXT: ret i32 0
define i32 @ubfe_width_32(i32 %src, i32 %offset) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 32)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_width_33(
; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 1)
define i32 @ubfe_width_33(i32 %src, i32 %offset) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 33)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_33(
; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 1, i32 %width)
define i32 @ubfe_offset_33(i32 %src, i32 %width) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 33, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_0(
; CHECK-NEXT: %1 = sub i32 32, %width
; CHECK-NEXT: %2 = shl i32 %src, %1
; CHECK-NEXT: %bfe = lshr i32 %2, %1
; CHECK-NEXT: ret i32 %bfe
define i32 @ubfe_offset_0(i32 %src, i32 %width) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_32(
; CHECK-NEXT: %1 = sub i32 32, %width
; CHECK-NEXT: %2 = shl i32 %src, %1
; CHECK-NEXT: %bfe = lshr i32 %2, %1
; CHECK-NEXT: ret i32 %bfe
define i32 @ubfe_offset_32(i32 %src, i32 %width) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_31(
; CHECK-NEXT: %1 = sub i32 32, %width
; CHECK-NEXT: %2 = shl i32 %src, %1
; CHECK-NEXT: %bfe = lshr i32 %2, %1
; CHECK-NEXT: ret i32 %bfe
define i32 @ubfe_offset_31(i32 %src, i32 %width) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_0_width_0(
; CHECK-NEXT: ret i32 0
define i32 @ubfe_offset_0_width_0(i32 %src) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 0)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_0_width_3(
; CHECK-NEXT: and i32 %src, 7
; CHECK-NEXT: ret
define i32 @ubfe_offset_0_width_3(i32 %src) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_3_width_1(
; CHECK-NEXT: %1 = lshr i32 %src, 3
; CHECK-NEXT: and i32 %1, 1
; CHECK-NEXT: ret i32
define i32 @ubfe_offset_3_width_1(i32 %src) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 1)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_3_width_4(
; CHECK-NEXT: %1 = lshr i32 %src, 3
; CHECK-NEXT: and i32 %1, 15
; CHECK-NEXT: ret i32
define i32 @ubfe_offset_3_width_4(i32 %src) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 4)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_0_0_0(
; CHECK-NEXT: ret i32 0
define i32 @ubfe_0_0_0() {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_neg1_5_7(
; CHECK-NEXT: ret i32 127
define i32 @ubfe_neg1_5_7() {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 -1, i32 5, i32 7)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_undef_src_i32(
; CHECK-NEXT: ret i32 undef
define i32 @ubfe_undef_src_i32(i32 %offset, i32 %width) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 undef, i32 %offset, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_undef_offset_i32(
; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
define i32 @ubfe_undef_offset_i32(i32 %src, i32 %width) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_undef_width_i32(
; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
define i32 @ubfe_undef_width_i32(i32 %src, i32 %offset) {
%bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
ret i32 %bfe
}
; CHECK-LABEL: @ubfe_offset_33_width_4_i64(
; CHECK-NEXT: %1 = lshr i64 %src, 33
; CHECK-NEXT: %bfe = and i64 %1, 15
define i64 @ubfe_offset_33_width_4_i64(i64 %src) {
%bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 33, i32 4)
ret i64 %bfe
}
; CHECK-LABEL: @ubfe_offset_0_i64(
; CHECK-NEXT: %1 = sub i32 64, %width
; CHECK-NEXT: %2 = zext i32 %1 to i64
; CHECK-NEXT: %3 = shl i64 %src, %2
; CHECK-NEXT: %bfe = lshr i64 %3, %2
; CHECK-NEXT: ret i64 %bfe
define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) {
%bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width)
ret i64 %bfe
}
; CHECK-LABEL: @ubfe_offset_32_width_32_i64(
; CHECK-NEXT: %bfe = lshr i64 %src, 32
; CHECK-NEXT: ret i64 %bfe
define i64 @ubfe_offset_32_width_32_i64(i64 %src) {
%bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 32, i32 32)
ret i64 %bfe
}
; --------------------------------------------------------------------
; llvm.amdgcn.sbfe
; --------------------------------------------------------------------
declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone
declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone
; CHECK-LABEL: @sbfe_offset_31(
; CHECK-NEXT: %1 = sub i32 32, %width
; CHECK-NEXT: %2 = shl i32 %src, %1
; CHECK-NEXT: %bfe = ashr i32 %2, %1
; CHECK-NEXT: ret i32 %bfe
define i32 @sbfe_offset_31(i32 %src, i32 %width) {
%bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 32, i32 %width)
ret i32 %bfe
}
; CHECK-LABEL: @sbfe_neg1_5_7(
; CHECK-NEXT: ret i32 -1
define i32 @sbfe_neg1_5_7() {
%bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 -1, i32 5, i32 7)
ret i32 %bfe
}
; CHECK-LABEL: @sbfe_offset_32_width_32_i64(
; CHECK-NEXT: %bfe = ashr i64 %src, 32
; CHECK-NEXT: ret i64 %bfe
define i64 @sbfe_offset_32_width_32_i64(i64 %src) {
%bfe = call i64 @llvm.amdgcn.sbfe.i64(i64 %src, i32 32, i32 32)
ret i64 %bfe
}
; --------------------------------------------------------------------
; llvm.amdgcn.exp
; --------------------------------------------------------------------
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly
; Make sure no crashing on invalid variable params
; CHECK-LABEL: @exp_invalid_inputs(
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
define void @exp_invalid_inputs(i32 %tgt, i32 %en) {
call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
ret void
}
; CHECK-LABEL: @exp_disabled_inputs_to_undef(
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float 4.000000e+00, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float undef, float undef, float undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float %y, float undef, float undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float %z, float undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float %w, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.000000e+00, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.000000e+00, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.000000e+00, float undef, float undef, float 4.000000e+00, i1 false, i1 false)
; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 false)
define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w) {
; enable src0..src3 constants
call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
; enable src0..src3 variables
call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float %y, float %z, float %w, i1 true, i1 false)
call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %x, float %y, float %z, float %w, i1 true, i1 false)
call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float %x, float %y, float %z, float %w, i1 true, i1 false)
call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float %x, float %y, float %z, float %w, i1 true, i1 false)
; enable none
call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %x, float %y, float %z, float %w, i1 true, i1 false)
; enable different source combinations
call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
ret void
}
; --------------------------------------------------------------------
; llvm.amdgcn.exp.compr
; --------------------------------------------------------------------
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind inaccessiblememonly
; CHECK-LABEL: @exp_compr_invalid_inputs(
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en) {
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
ret void
}
; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef(
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> undef, <2 x half> %zw, i1 true, i1 false)
; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) {
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
ret void
}
; --------------------------------------------------------------------
; llvm.amdgcn.fmed3
; --------------------------------------------------------------------
declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
; CHECK-LABEL: @fmed3_f32(
; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
define float @fmed3_f32(float %x, float %y, float %z) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
ret float %med3
}
; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32(
; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
define float @fmed3_canonicalize_x_c0_c1_f32(float %x) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0)
ret float %med3
}
; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32(
; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
define float @fmed3_canonicalize_c0_x_c1_f32(float %x) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0)
ret float %med3
}
; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32(
; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
define float @fmed3_canonicalize_c0_c1_x_f32(float %x) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x)
ret float %med3
}
; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32(
; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0)
ret float %med3
}
; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32(
; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y)
ret float %med3
}
; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32(
; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y)
ret float %med3
}
; CHECK-LABEL: @fmed3_undef_x_y_f32(
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
define float @fmed3_undef_x_y_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
ret float %med3
}
; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32(
; CHECK: call nnan float @llvm.minnum.f32(float %x, float %y)
define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) {
%med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
ret float %med3
}
; CHECK-LABEL: @fmed3_x_undef_y_f32(
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
define float @fmed3_x_undef_y_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y)
ret float %med3
}
; CHECK-LABEL: @fmed3_x_y_undef_f32(
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
define float @fmed3_x_y_undef_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
ret float %med3
}
; CHECK-LABEL: @fmed3_qnan0_x_y_f32(
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
define float @fmed3_qnan0_x_y_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y)
ret float %med3
}
; CHECK-LABEL: @fmed3_x_qnan0_y_f32(
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
define float @fmed3_x_qnan0_y_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y)
ret float %med3
}
; CHECK-LABEL: @fmed3_x_y_qnan0_f32(
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
define float @fmed3_x_y_qnan0_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
ret float %med3
}
; CHECK-LABEL: @fmed3_qnan1_x_y_f32(
; CHECK: call float @llvm.minnum.f32(float %x, float %y)
define float @fmed3_qnan1_x_y_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y)
ret float %med3
}
; This can return any of the qnans.
; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32(
; CHECK: ret float 0x7FF8002000000000
define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000)
ret float %med3
}
; CHECK-LABEL: @fmed3_constant_src0_0_f32(
; CHECK: ret float 5.000000e-01
define float @fmed3_constant_src0_0_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0)
ret float %med3
}
; CHECK-LABEL: @fmed3_constant_src0_1_f32(
; CHECK: ret float 5.000000e-01
define float @fmed3_constant_src0_1_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0)
ret float %med3
}
; CHECK-LABEL: @fmed3_constant_src1_0_f32(
; CHECK: ret float 5.000000e-01
define float @fmed3_constant_src1_0_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0)
ret float %med3
}
; CHECK-LABEL: @fmed3_constant_src1_1_f32(
; CHECK: ret float 5.000000e-01
define float @fmed3_constant_src1_1_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0)
ret float %med3
}
; CHECK-LABEL: @fmed3_constant_src2_0_f32(
; CHECK: ret float 5.000000e-01
define float @fmed3_constant_src2_0_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5)
ret float %med3
}
; CHECK-LABEL: @fmed3_constant_src2_1_f32(
; CHECK: ret float 5.000000e-01
define float @fmed3_constant_src2_1_f32(float %x, float %y) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5)
ret float %med3
}
; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32(
; CHECK: ret float %x
define float @fmed3_x_qnan0_qnan1_f32(float %x) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
ret float %med3
}
; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32(
; CHECK: ret float %x
define float @fmed3_qnan0_x_qnan1_f32(float %x) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
ret float %med3
}
; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32(
; CHECK: ret float %x
define float @fmed3_qnan0_qnan1_x_f32(float %x) {
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
ret float %med3
}
; --------------------------------------------------------------------
; llvm.amdgcn.icmp
; --------------------------------------------------------------------
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
; Make sure there's no crash for invalid input
; CHECK-LABEL: @invalid_nonconstant_icmp_code(
; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
ret i64 %result
}
; CHECK-LABEL: @invalid_icmp_code(
; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
; CHECK: %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
define i64 @invalid_icmp_code(i32 %a, i32 %b) {
%under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
%over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
%or = or i64 %under, %over
ret i64 %or
}
; CHECK-LABEL: @icmp_constant_inputs_false(
; CHECK: ret i64 0
define i64 @icmp_constant_inputs_false() {
%result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 32)
ret i64 %result
}
; CHECK-LABEL: @icmp_constant_inputs_true(
-; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4
+; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5
define i64 @icmp_constant_inputs_true() {
%result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34)
ret i64 %result
}
; CHECK-LABEL: @icmp_constant_to_rhs_slt(
; CHECK: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %x, i32 9, i32 38)
define i64 @icmp_constant_to_rhs_slt(i32 %x) {
%result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 %x, i32 40)
ret i64 %result
}
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32(
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
%cmp = icmp eq i32 %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32(
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33)
define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) {
%cmp = icmp ne i32 %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32(
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 41)
define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
%cmp = icmp sle i32 %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64(
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34)
define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) {
%cmp = icmp ugt i64 %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64(
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34)
define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) {
%cmp = icmp ugt i64 %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 0, i32 %zext.cmp, i32 33)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32(
; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 1)
define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) {
%cmp = fcmp oeq float %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32(
; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14)
define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) {
%cmp = fcmp une float %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64(
; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 4)
define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) {
%cmp = fcmp olt double %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32(
; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) {
%cmp = icmp eq i32 %a, %b
%sext.cmp = sext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 0, i32 33)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32(
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33)
define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
%cmp = icmp eq i32 %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32(
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39)
define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) {
%cmp = icmp slt i32 %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32(
; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14)
define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) {
%cmp = fcmp oeq float %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32(
; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 2)
define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) {
%cmp = fcmp ule float %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32(
; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 13)
define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) {
%cmp = fcmp ogt float %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32(
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) {
%cmp = icmp eq i32 %a, %b
%zext.cmp = zext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 1, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32(
; CHECK: %zext.cond = zext i1 %cond to i32
; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 0, i32 33)
define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) {
%zext.cond = zext i1 %cond to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 1, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32(
; CHECK: %zext.cond = zext i1 %cond to i32
; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) {
%zext.cond = zext i1 %cond to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32(
; CHECK: %sext.cond = sext i1 %cond to i32
; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) {
%sext.cond = sext i1 %cond to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32(
; CHECK: %sext.cond = sext i1 %cond to i32
; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 0, i32 33)
define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) {
%sext.cond = sext i1 %cond to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 -1, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64(
; CHECK: %sext.cond = sext i1 %cond to i64
; CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 0, i32 33)
define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) {
%sext.cond = sext i1 %cond to i64
%mask = call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 -1, i32 32)
ret i64 %mask
}
; TODO: Should be able to fold to false
; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32(
; CHECK: %cmp = icmp eq i32 %a, %b
; CHECK: %sext.cmp = sext i1 %cmp to i32
; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) {
%cmp = icmp eq i32 %a, %b
%sext.cmp = sext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32(
; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) {
%cmp = icmp eq i32 %a, %b
%sext.cmp = sext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32(
; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39)
define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) {
%cmp = icmp sge i32 %a, %b
%sext.cmp = sext i1 %cmp to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
ret i64 %mask
}
; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32(
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 38)
define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
%cmp = icmp sle i32 %a, %b
%not = xor i1 %cmp, true
%zext.cmp = zext i1 %not to i32
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
ret i64 %mask
}
; --------------------------------------------------------------------
; llvm.amdgcn.fcmp
; --------------------------------------------------------------------
declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent
; Make sure there's no crash for invalid input
; CHECK-LABEL: @invalid_nonconstant_fcmp_code(
; CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
%result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
ret i64 %result
}
; CHECK-LABEL: @invalid_fcmp_code(
; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
; CHECK: %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
define i64 @invalid_fcmp_code(float %a, float %b) {
%under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
%over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
%or = or i64 %under, %over
ret i64 %or
}
; CHECK-LABEL: @fcmp_constant_inputs_false(
; CHECK: ret i64 0
define i64 @fcmp_constant_inputs_false() {
%result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 1)
ret i64 %result
}
; CHECK-LABEL: @fcmp_constant_inputs_true(
-; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4
+; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5
define i64 @fcmp_constant_inputs_true() {
%result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4)
ret i64 %result
}
; CHECK-LABEL: @fcmp_constant_to_rhs_olt(
; CHECK: %result = call i64 @llvm.amdgcn.fcmp.f32(float %x, float 4.000000e+00, i32 2)
define i64 @fcmp_constant_to_rhs_olt(float %x) {
%result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4)
ret i64 %result
}
-; CHECK: attributes #4 = { convergent }
+; CHECK: attributes #5 = { convergent }
Index: llvm/trunk/test/CodeGen/AMDGPU/zext-lid.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/zext-lid.ll (revision 301936)
+++ llvm/trunk/test/CodeGen/AMDGPU/zext-lid.ll (revision 301937)
@@ -1,83 +1,84 @@
; RUN: llc -march=amdgcn < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s
; CHECK-NOT: and_b32
; OPT-LABEL: @zext_grp_size_128
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.x() #2, !range !0
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.y() #2, !range !0
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.z() #2, !range !0
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0
define amdgpu_kernel void @zext_grp_size_128(i32 addrspace(1)* nocapture %arg) #0 {
bb:
- %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #2
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = and i32 %tmp, 127
store i32 %tmp1, i32 addrspace(1)* %arg, align 4
- %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() #2
+ %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
%tmp3 = and i32 %tmp2, 127
%tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
- %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() #2
+ %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
%tmp6 = and i32 %tmp5, 127
%tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
ret void
}
; OPT-LABEL: @zext_grp_size_32x4x1
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.x() #2, !range !2
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.y() #2, !range !3
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.z() #2, !range !4
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !2
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !3
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !4
define amdgpu_kernel void @zext_grp_size_32x4x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !0 {
bb:
- %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #2
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = and i32 %tmp, 31
store i32 %tmp1, i32 addrspace(1)* %arg, align 4
- %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() #2
+ %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
%tmp3 = and i32 %tmp2, 3
%tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
- %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() #2
+ %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
%tmp6 = and i32 %tmp5, 1
%tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
ret void
}
; OPT-LABEL: @zext_grp_size_512
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.x() #2, !range !5
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.y() #2, !range !5
-; OPT: tail call i32 @llvm.amdgcn.workitem.id.z() #2, !range !5
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !5
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !5
+; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !5
define amdgpu_kernel void @zext_grp_size_512(i32 addrspace(1)* nocapture %arg) #1 {
bb:
- %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #2
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = and i32 %tmp, 65535
store i32 %tmp1, i32 addrspace(1)* %arg, align 4
- %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() #2
+ %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
%tmp3 = and i32 %tmp2, 65535
%tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
- %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() #2
+ %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
%tmp6 = and i32 %tmp5, 65535
%tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #2
declare i32 @llvm.amdgcn.workitem.id.y() #2
declare i32 @llvm.amdgcn.workitem.id.z() #2
attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" }
attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
-attributes #2 = { nounwind readnone }
+attributes #2 = { nounwind readnone speculatable }
+attributes #3 = { nounwind readnone }
!0 = !{i32 32, i32 4, i32 1}
; OPT: !0 = !{i32 0, i32 128}
; OPT: !1 = !{i32 32, i32 4, i32 1}
; OPT: !2 = !{i32 0, i32 32}
; OPT: !3 = !{i32 0, i32 4}
; OPT: !4 = !{i32 0, i32 1}
; OPT: !5 = !{i32 0, i32 512}
Index: llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (revision 301936)
+++ llvm/trunk/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll (revision 301937)
@@ -1,238 +1,238 @@
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workgroup.id.y() #0
declare i32 @llvm.amdgcn.workgroup.id.z() #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
declare i32 @llvm.amdgcn.workitem.id.z() #0
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0
; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val1, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
store volatile i32 %val1, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workgroup.id.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
store volatile i32 %val1, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.y()
%val1 = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
store volatile i32 %val1, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workgroup.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
%val2 = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
store volatile i32 %val1, i32 addrspace(1)* %ptr
store volatile i32 %val2, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workitem.id.x()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workitem.id.y()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
%val = call i32 @llvm.amdgcn.workitem.id.z()
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workgroup.id.x()
store volatile i32 %val0, i32 addrspace(1)* %ptr
store volatile i32 %val1, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workitem.id.y()
%val1 = call i32 @llvm.amdgcn.workgroup.id.y()
store volatile i32 %val0, i32 addrspace(1)* %ptr
store volatile i32 %val1, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
%val2 = call i32 @llvm.amdgcn.workitem.id.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
store volatile i32 %val1, i32 addrspace(1)* %ptr
store volatile i32 %val2, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
%val0 = call i32 @llvm.amdgcn.workitem.id.x()
%val1 = call i32 @llvm.amdgcn.workitem.id.y()
%val2 = call i32 @llvm.amdgcn.workitem.id.z()
%val3 = call i32 @llvm.amdgcn.workgroup.id.x()
%val4 = call i32 @llvm.amdgcn.workgroup.id.y()
%val5 = call i32 @llvm.amdgcn.workgroup.id.z()
store volatile i32 %val0, i32 addrspace(1)* %ptr
store volatile i32 %val1, i32 addrspace(1)* %ptr
store volatile i32 %val2, i32 addrspace(1)* %ptr
store volatile i32 %val3, i32 addrspace(1)* %ptr
store volatile i32 %val4, i32 addrspace(1)* %ptr
store volatile i32 %val5, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
%val = load i32, i32 addrspace(2)* %bc
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
%dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr()
%bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)*
%val = load i32, i32 addrspace(2)* %bc
store i32 %val, i32 addrspace(1)* %ptr
ret void
}
; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
ret void
}
; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #11 {
define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #1 {
%stof = addrspacecast i32* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
ret void
}
; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #1 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
store volatile i32 0, i32 addrspace(3)* %ftos
ret void
}
; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #1 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
store volatile i32 0, i32* %ftos
ret void
}
; No-op addrspacecast should not use queue ptr
; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
store volatile i32 0, i32 addrspace(4)* %stof
ret void
}
; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #1 {
%stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
%ld = load volatile i32, i32 addrspace(4)* %stof
ret void
}
; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #1 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
store volatile i32 0, i32 addrspace(1)* %ftos
ret void
}
; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #1 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
%ld = load volatile i32, i32 addrspace(2)* %ftos
ret void
}
-attributes #0 = { nounwind readnone }
+attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind }
-; HSA: attributes #0 = { nounwind readnone }
+; HSA: attributes #0 = { nounwind readnone speculatable }
; HSA: attributes #1 = { nounwind }
; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
Index: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
===================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (revision 301936)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (revision 301937)
@@ -1,736 +1,780 @@
//===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines all of the R600-specific intrinsics.
//
//===----------------------------------------------------------------------===//
class AMDGPUReadPreloadRegisterIntrinsic
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
class AMDGPUReadPreloadRegisterIntrinsicNamed<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, GCCBuiltin<name>;
+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<name>;
let TargetPrefix = "r600" in {
multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz {
def _x : AMDGPUReadPreloadRegisterIntrinsic;
def _y : AMDGPUReadPreloadRegisterIntrinsic;
def _z : AMDGPUReadPreloadRegisterIntrinsic;
}
multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
def _x : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
def _y : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
def _z : AMDGPUReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
}
defm int_r600_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
<"__builtin_r600_read_global_size">;
defm int_r600_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
<"__builtin_r600_read_ngroups">;
defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
<"__builtin_r600_read_tgid">;
defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz;
defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">,
Intrinsic<[], [], [IntrConvergent]>;
// AS 7 is PARAM_I_ADDRESS, used for kernel arguments
def int_r600_implicitarg_ptr :
GCCBuiltin<"__builtin_r600_implicitarg_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [], [IntrNoMem]>;
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [],
+ [IntrNoMem, IntrSpeculatable]>;
def int_r600_rat_store_typed :
// 1st parameter: Data
// 2nd parameter: Index
// 3rd parameter: Constant RAT ID
Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>,
GCCBuiltin<"__builtin_r600_rat_store_typed">;
def int_r600_recipsqrt_ieee : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
def int_r600_recipsqrt_clamped : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
def int_r600_cube : Intrinsic<
- [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]
+ [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable]
>;
} // End TargetPrefix = "r600"
let TargetPrefix = "amdgcn" in {
//===----------------------------------------------------------------------===//
// ABI Special Intrinsics
//===----------------------------------------------------------------------===//
defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz;
defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
<"__builtin_amdgcn_workgroup_id">;
def int_amdgcn_dispatch_ptr :
GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+ [IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_queue_ptr :
GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+ [IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_kernarg_segment_ptr :
GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+ [IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_implicitarg_ptr :
GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+ [IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_groupstaticsize :
GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_dispatch_id :
GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
- Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_implicit_buffer_ptr :
GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
- Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [],
+ [IntrNoMem, IntrSpeculatable]>;
// Set EXEC to the 64-bit value given.
// This is always moved to the beginning of the basic block.
def int_amdgcn_init_exec : Intrinsic<[],
[llvm_i64_ty], // 64-bit literal constant
[IntrConvergent]>;
// Set EXEC according to a thread count packed in an SGPR input:
// thread_count = (input >> bitoffset) & 0x7f;
// This is always moved to the beginning of the basic block.
def int_amdgcn_init_exec_from_input : Intrinsic<[],
[llvm_i32_ty, // 32-bit SGPR input
llvm_i32_ty], // bit offset of the thread count
[IntrConvergent]>;
//===----------------------------------------------------------------------===//
// Instruction Intrinsics
//===----------------------------------------------------------------------===//
// The first parameter is s_sendmsg immediate (i16),
// the second one is copied to m0
def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
Intrinsic<[], [], [IntrConvergent]>;
def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
Intrinsic<[], [], [IntrConvergent]>;
def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
Intrinsic<[], [llvm_i32_ty], []>;
def int_amdgcn_div_scale : Intrinsic<
// 1st parameter: Numerator
// 2nd parameter: Denominator
// 3rd parameter: Constant to select select between first and
// second. (0 = first, 1 = second).
[llvm_anyfloat_ty, llvm_i1_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
- [IntrNoMem]
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
- [IntrNoMem]
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem]
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_trig_preop : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_sin : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_cos : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_log_clamp : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_rcp : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]
+ Intrinsic<[llvm_float_ty], [llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_rsq : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
Intrinsic<
- [llvm_float_ty], [llvm_float_ty], [IntrNoMem]
+ [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_rsq_clamp : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_ldexp : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_frexp_mant : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_frexp_exp : Intrinsic<
- [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]
+ [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable]
>;
// v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
// and always uses rtz, so is not suitable for implementing the OpenCL
// fract function. It should be ok on VI.
def int_amdgcn_fract : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_cvt_pkrtz : Intrinsic<
- [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
+ [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_class : Intrinsic<
- [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]
+ [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
Intrinsic<[llvm_anyfloat_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
Intrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">,
Intrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">,
Intrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
Intrinsic<[llvm_float_ty],
- [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
+ [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
// should be used.
def int_amdgcn_sffbh :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]
+>;
// Fields should mirror atomicrmw
class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
[llvm_anyptr_ty,
LLVMMatchType<0>,
llvm_i32_ty, // ordering
llvm_i32_ty, // scope
llvm_i1_ty], // isVolatile
[IntrArgMemOnly, NoCapture<0>]
>;
def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
class AMDGPUImageLoad : Intrinsic <
[llvm_anyfloat_ty], // vdata(VGPR)
[llvm_anyint_ty, // vaddr(VGPR)
llvm_anyint_ty, // rsrc(SGPR)
llvm_i32_ty, // dmask(imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty, // slc(imm)
llvm_i1_ty, // lwe(imm)
llvm_i1_ty], // da(imm)
[IntrReadMem]>;
def int_amdgcn_image_load : AMDGPUImageLoad;
def int_amdgcn_image_load_mip : AMDGPUImageLoad;
def int_amdgcn_image_getresinfo : AMDGPUImageLoad;
class AMDGPUImageStore : Intrinsic <
[],
[llvm_anyfloat_ty, // vdata(VGPR)
llvm_anyint_ty, // vaddr(VGPR)
llvm_anyint_ty, // rsrc(SGPR)
llvm_i32_ty, // dmask(imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty, // slc(imm)
llvm_i1_ty, // lwe(imm)
llvm_i1_ty], // da(imm)
[]>;
def int_amdgcn_image_store : AMDGPUImageStore;
def int_amdgcn_image_store_mip : AMDGPUImageStore;
class AMDGPUImageSample : Intrinsic <
[llvm_anyfloat_ty], // vdata(VGPR)
[llvm_anyfloat_ty, // vaddr(VGPR)
llvm_anyint_ty, // rsrc(SGPR)
llvm_v4i32_ty, // sampler(SGPR)
llvm_i32_ty, // dmask(imm)
llvm_i1_ty, // unorm(imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty, // slc(imm)
llvm_i1_ty, // lwe(imm)
llvm_i1_ty], // da(imm)
[IntrReadMem]>;
// Basic sample
def int_amdgcn_image_sample : AMDGPUImageSample;
def int_amdgcn_image_sample_cl : AMDGPUImageSample;
def int_amdgcn_image_sample_d : AMDGPUImageSample;
def int_amdgcn_image_sample_d_cl : AMDGPUImageSample;
def int_amdgcn_image_sample_l : AMDGPUImageSample;
def int_amdgcn_image_sample_b : AMDGPUImageSample;
def int_amdgcn_image_sample_b_cl : AMDGPUImageSample;
def int_amdgcn_image_sample_lz : AMDGPUImageSample;
def int_amdgcn_image_sample_cd : AMDGPUImageSample;
def int_amdgcn_image_sample_cd_cl : AMDGPUImageSample;
// Sample with comparison
def int_amdgcn_image_sample_c : AMDGPUImageSample;
def int_amdgcn_image_sample_c_cl : AMDGPUImageSample;
def int_amdgcn_image_sample_c_d : AMDGPUImageSample;
def int_amdgcn_image_sample_c_d_cl : AMDGPUImageSample;
def int_amdgcn_image_sample_c_l : AMDGPUImageSample;
def int_amdgcn_image_sample_c_b : AMDGPUImageSample;
def int_amdgcn_image_sample_c_b_cl : AMDGPUImageSample;
def int_amdgcn_image_sample_c_lz : AMDGPUImageSample;
def int_amdgcn_image_sample_c_cd : AMDGPUImageSample;
def int_amdgcn_image_sample_c_cd_cl : AMDGPUImageSample;
// Sample with offsets
def int_amdgcn_image_sample_o : AMDGPUImageSample;
def int_amdgcn_image_sample_cl_o : AMDGPUImageSample;
def int_amdgcn_image_sample_d_o : AMDGPUImageSample;
def int_amdgcn_image_sample_d_cl_o : AMDGPUImageSample;
def int_amdgcn_image_sample_l_o : AMDGPUImageSample;
def int_amdgcn_image_sample_b_o : AMDGPUImageSample;
def int_amdgcn_image_sample_b_cl_o : AMDGPUImageSample;
def int_amdgcn_image_sample_lz_o : AMDGPUImageSample;
def int_amdgcn_image_sample_cd_o : AMDGPUImageSample;
def int_amdgcn_image_sample_cd_cl_o : AMDGPUImageSample;
// Sample with comparison and offsets
def int_amdgcn_image_sample_c_o : AMDGPUImageSample;
def int_amdgcn_image_sample_c_cl_o : AMDGPUImageSample;
def int_amdgcn_image_sample_c_d_o : AMDGPUImageSample;
def int_amdgcn_image_sample_c_d_cl_o : AMDGPUImageSample;
def int_amdgcn_image_sample_c_l_o : AMDGPUImageSample;
def int_amdgcn_image_sample_c_b_o : AMDGPUImageSample;
def int_amdgcn_image_sample_c_b_cl_o : AMDGPUImageSample;
def int_amdgcn_image_sample_c_lz_o : AMDGPUImageSample;
def int_amdgcn_image_sample_c_cd_o : AMDGPUImageSample;
def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUImageSample;
// Basic gather4
def int_amdgcn_image_gather4 : AMDGPUImageSample;
def int_amdgcn_image_gather4_cl : AMDGPUImageSample;
def int_amdgcn_image_gather4_l : AMDGPUImageSample;
def int_amdgcn_image_gather4_b : AMDGPUImageSample;
def int_amdgcn_image_gather4_b_cl : AMDGPUImageSample;
def int_amdgcn_image_gather4_lz : AMDGPUImageSample;
// Gather4 with comparison
def int_amdgcn_image_gather4_c : AMDGPUImageSample;
def int_amdgcn_image_gather4_c_cl : AMDGPUImageSample;
def int_amdgcn_image_gather4_c_l : AMDGPUImageSample;
def int_amdgcn_image_gather4_c_b : AMDGPUImageSample;
def int_amdgcn_image_gather4_c_b_cl : AMDGPUImageSample;
def int_amdgcn_image_gather4_c_lz : AMDGPUImageSample;
// Gather4 with offsets
def int_amdgcn_image_gather4_o : AMDGPUImageSample;
def int_amdgcn_image_gather4_cl_o : AMDGPUImageSample;
def int_amdgcn_image_gather4_l_o : AMDGPUImageSample;
def int_amdgcn_image_gather4_b_o : AMDGPUImageSample;
def int_amdgcn_image_gather4_b_cl_o : AMDGPUImageSample;
def int_amdgcn_image_gather4_lz_o : AMDGPUImageSample;
// Gather4 with comparison and offsets
def int_amdgcn_image_gather4_c_o : AMDGPUImageSample;
def int_amdgcn_image_gather4_c_cl_o : AMDGPUImageSample;
def int_amdgcn_image_gather4_c_l_o : AMDGPUImageSample;
def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample;
def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample;
def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample;
def int_amdgcn_image_getlod : AMDGPUImageSample;
class AMDGPUImageAtomic : Intrinsic <
[llvm_i32_ty],
[llvm_i32_ty, // vdata(VGPR)
llvm_anyint_ty, // vaddr(VGPR)
llvm_v8i32_ty, // rsrc(SGPR)
llvm_i1_ty, // r128(imm)
llvm_i1_ty, // da(imm)
llvm_i1_ty], // slc(imm)
[]>;
def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_sub : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_smin : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_umin : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_smax : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_umax : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_and : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_or : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_xor : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_inc : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_dec : AMDGPUImageAtomic;
def int_amdgcn_image_atomic_cmpswap : Intrinsic <
[llvm_i32_ty],
[llvm_i32_ty, // src(VGPR)
llvm_i32_ty, // cmp(VGPR)
llvm_anyint_ty, // vaddr(VGPR)
llvm_v8i32_ty, // rsrc(SGPR)
llvm_i1_ty, // r128(imm)
llvm_i1_ty, // da(imm)
llvm_i1_ty], // slc(imm)
[]>;
class AMDGPUBufferLoad : Intrinsic <
[llvm_anyfloat_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
[IntrReadMem]>;
def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
def int_amdgcn_buffer_load : AMDGPUBufferLoad;
class AMDGPUBufferStore : Intrinsic <
[],
[llvm_anyfloat_ty, // vdata(VGPR) -- can currently only select f32, v2f32, v4f32
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty, // glc(imm)
llvm_i1_ty], // slc(imm)
[IntrWriteMem]>;
def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
def int_amdgcn_buffer_store : AMDGPUBufferStore;
class AMDGPUBufferAtomic : Intrinsic <
[llvm_i32_ty],
[llvm_i32_ty, // vdata(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty], // slc(imm)
[]>;
def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_smin : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_umin : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_smax : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_umax : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_and : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_or : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_xor : AMDGPUBufferAtomic;
def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
[llvm_i32_ty],
[llvm_i32_ty, // src(VGPR)
llvm_i32_ty, // cmp(VGPR)
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(SGPR/VGPR/imm)
llvm_i1_ty], // slc(imm)
[]>;
// Uses that do not set the done bit should set IntrWriteMem on the
// call site.
def int_amdgcn_exp : Intrinsic <[], [
llvm_i32_ty, // tgt,
llvm_i32_ty, // en
llvm_any_ty, // src0 (f32 or i32)
LLVMMatchType<0>, // src1
LLVMMatchType<0>, // src2
LLVMMatchType<0>, // src3
llvm_i1_ty, // done
llvm_i1_ty // vm
],
[]
>;
// exp with compr bit set.
def int_amdgcn_exp_compr : Intrinsic <[], [
llvm_i32_ty, // tgt,
llvm_i32_ty, // en
llvm_anyvector_ty, // src0 (v2f16 or v2i16)
LLVMMatchType<0>, // src1
llvm_i1_ty, // done
llvm_i1_ty], // vm
[]
>;
def int_amdgcn_buffer_wbinvl1_sc :
GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
Intrinsic<[], [], []>;
def int_amdgcn_buffer_wbinvl1 :
GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
Intrinsic<[], [], []>;
def int_amdgcn_s_dcache_inv :
GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
Intrinsic<[], [], []>;
def int_amdgcn_s_memtime :
GCCBuiltin<"__builtin_amdgcn_s_memtime">,
Intrinsic<[llvm_i64_ty], [], []>;
def int_amdgcn_s_sleep :
GCCBuiltin<"__builtin_amdgcn_s_sleep">,
Intrinsic<[], [llvm_i32_ty], []> {
}
def int_amdgcn_s_incperflevel :
GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
Intrinsic<[], [llvm_i32_ty], []> {
}
def int_amdgcn_s_decperflevel :
GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
Intrinsic<[], [llvm_i32_ty], []> {
}
def int_amdgcn_s_getreg :
GCCBuiltin<"__builtin_amdgcn_s_getreg">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
+ [IntrReadMem, IntrSpeculatable]
+>;
// __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0>
// param values: 0 = P10, 1 = P20, 2 = P0
def int_amdgcn_interp_mov :
GCCBuiltin<"__builtin_amdgcn_interp_mov">,
Intrinsic<[llvm_float_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>;
+ [IntrNoMem, IntrSpeculatable]>;
// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
+// This intrinsic reads from lds, but the memory values are constant,
+// so it behaves like IntrNoMem.
def int_amdgcn_interp_p1 :
GCCBuiltin<"__builtin_amdgcn_interp_p1">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>; // This intrinsic reads from lds, but the memory
- // values are constant, so it behaves like IntrNoMem.
+ [IntrNoMem, IntrSpeculatable]>;
// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
def int_amdgcn_interp_p2 :
GCCBuiltin<"__builtin_amdgcn_interp_p2">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem]>; // See int_amdgcn_v_interp_p1 for why this is
- // IntrNoMem.
+ [IntrNoMem, IntrSpeculatable]>;
+ // See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
// Pixel shaders only: whether the current pixel is live (i.e. not a helper
// invocation for derivative computation).
def int_amdgcn_ps_live : Intrinsic <
[llvm_i1_ty],
[],
[IntrNoMem]>;
def int_amdgcn_mbcnt_lo :
GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_amdgcn_mbcnt_hi :
GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
// llvm.amdgcn.ds.swizzle src offset
def int_amdgcn_ds_swizzle :
GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]
+ [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]
+ [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
def int_amdgcn_lerp :
GCCBuiltin<"__builtin_amdgcn_lerp">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
def int_amdgcn_sad_u8 :
GCCBuiltin<"__builtin_amdgcn_sad_u8">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
def int_amdgcn_msad_u8 :
GCCBuiltin<"__builtin_amdgcn_msad_u8">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
def int_amdgcn_sad_hi_u8 :
GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
def int_amdgcn_sad_u16 :
GCCBuiltin<"__builtin_amdgcn_sad_u16">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
def int_amdgcn_qsad_pk_u16_u8 :
GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
- Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
def int_amdgcn_mqsad_pk_u16_u8 :
GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
- Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
def int_amdgcn_mqsad_u32_u8 :
GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
- Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
def int_amdgcn_cvt_pk_u8_f32 :
GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
def int_amdgcn_icmp :
Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrConvergent]>;
def int_amdgcn_fcmp :
Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrConvergent]>;
def int_amdgcn_readfirstlane :
GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
// The lane argument must be uniform across the currently active threads of the
// current wave. Otherwise, the result is undefined.
def int_amdgcn_readlane :
GCCBuiltin<"__builtin_amdgcn_readlane">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
//===----------------------------------------------------------------------===//
// CI+ Intrinsics
//===----------------------------------------------------------------------===//
def int_amdgcn_s_dcache_inv_vol :
GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
Intrinsic<[], [], []>;
def int_amdgcn_buffer_wbinvl1_vol :
GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
Intrinsic<[], [], []>;
//===----------------------------------------------------------------------===//
// VI Intrinsics
//===----------------------------------------------------------------------===//
// llvm.amdgcn.mov.dpp.i32 <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
def int_amdgcn_mov_dpp :
Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i1_ty], [IntrNoMem, IntrConvergent]>;
def int_amdgcn_s_dcache_wb :
GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
Intrinsic<[], [], []>;
def int_amdgcn_s_dcache_wb_vol :
GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
Intrinsic<[], [], []>;
def int_amdgcn_s_memrealtime :
GCCBuiltin<"__builtin_amdgcn_s_memrealtime">,
Intrinsic<[llvm_i64_ty], [], []>;
// llvm.amdgcn.ds.permute <index> <src>
def int_amdgcn_ds_permute :
GCCBuiltin<"__builtin_amdgcn_ds_permute">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
// llvm.amdgcn.ds.bpermute <index> <src>
def int_amdgcn_ds_bpermute :
GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
//===----------------------------------------------------------------------===//
// Special Intrinsics for backend internal use only. No frontend
// should emit calls to these.
// ===----------------------------------------------------------------------===//
def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
[llvm_i1_ty], [IntrConvergent]
>;
def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
[llvm_i64_ty], [IntrConvergent]
>;
def int_amdgcn_break : Intrinsic<[llvm_i64_ty],
[llvm_i64_ty], [IntrNoMem, IntrConvergent]
>;
def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
[llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
>;
def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty],
[llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
>;
def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
[llvm_i64_ty], [IntrConvergent]
>;
def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
// Represent unreachable in a divergent region.
def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
// Emit 2.5 ulp, no denormal division. Should only be inserted by
// pass based on !fpmath metadata.
def int_amdgcn_fdiv_fast : Intrinsic<
- [llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
+ [llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
>;
}
Index: llvm/trunk/lib/Target/AMDGPU/R600Intrinsics.td
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600Intrinsics.td (revision 301936)
+++ llvm/trunk/lib/Target/AMDGPU/R600Intrinsics.td (revision 301937)
@@ -1,67 +1,67 @@
//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// R600 Intrinsic Definitions
//
//===----------------------------------------------------------------------===//
class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [
llvm_v4f32_ty, // Coord
llvm_i32_ty, // offset_x
llvm_i32_ty, // offset_y,
llvm_i32_ty, // offset_z,
llvm_i32_ty, // resource_id
llvm_i32_ty, // samplerid
llvm_i32_ty, // coord_type_x
llvm_i32_ty, // coord_type_y
llvm_i32_ty, // coord_type_z
llvm_i32_ty], // coord_type_w
[IntrNoMem]
>;
class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [
llvm_v4i32_ty, // Coord
llvm_i32_ty, // offset_x
llvm_i32_ty, // offset_y,
llvm_i32_ty, // offset_z,
llvm_i32_ty, // resource_id
llvm_i32_ty, // samplerid
llvm_i32_ty, // coord_type_x
llvm_i32_ty, // coord_type_y
llvm_i32_ty, // coord_type_z
llvm_i32_ty], // coord_type_w
[IntrNoMem]
>;
let TargetPrefix = "r600", isTarget = 1 in {
def int_r600_store_swizzle :
Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []
>;
def int_r600_store_stream_output : Intrinsic<
[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []
>;
def int_r600_tex : TextureIntrinsicFloatInput;
def int_r600_texc : TextureIntrinsicFloatInput;
def int_r600_txl : TextureIntrinsicFloatInput;
def int_r600_txlc : TextureIntrinsicFloatInput;
def int_r600_txb : TextureIntrinsicFloatInput;
def int_r600_txbc : TextureIntrinsicFloatInput;
def int_r600_txf : TextureIntrinsicInt32Input;
def int_r600_txq : TextureIntrinsicInt32Input;
def int_r600_ddx : TextureIntrinsicFloatInput;
def int_r600_ddy : TextureIntrinsicFloatInput;
def int_r600_dot4 : Intrinsic<[llvm_float_ty],
- [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]
+ [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable]
>;
} // End TargetPrefix = "r600", isTarget = 1

Event Timeline