Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -407,12 +407,16 @@ GCCBuiltin<"__builtin_amdgcn_lerp">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_amdgcn_cvt_pk_u8_f32 : + GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_amdgcn_icmp : - Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty], + Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; def int_amdgcn_fcmp : - Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty], + Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -1655,6 +1655,7 @@ def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; +def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; // This class is used only with VOPC instructions. Use $sdst for out operand class SIInstAlias : Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1565,7 +1565,7 @@ VOP_I32_I32_I32_I32, int_amdgcn_lerp >; } // End isCommutable = 1 - + //def V_LERP_U8 : VOP3_U8 <0x0000014d, "v_lerp_u8", []>; defm V_ALIGNBIT_B32 : VOP3Inst , "v_alignbit_b32", VOP_I32_I32_I32_I32 @@ -1608,6 +1608,11 @@ defm V_SAD_U32 : VOP3Inst , "v_sad_u32", VOP_I32_I32_I32_I32 >; + +defm V_CVT_PK_U8_F32 : VOP3Inst, "v_cvt_pk_u8_f32", + VOP_I32_F32_I32_I32, int_amdgcn_cvt_pk_u8_f32 +>; + //def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>; defm V_DIV_FIXUP_F32 : VOP3Inst < vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup Index: test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll @@ -0,0 +1,61 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.cvt.pk.u8.f32(float, i32, i32) #0 + +; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_0: +; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 0, v{{[0-9]+}} +define void @v_cvt_pk_u8_f32_idx_0(i32 addrspace(1)* %out, float %src, i32 %reg) #1 { + %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 0, i32 %reg) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_1: +; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 1, v{{[0-9]+}} +define void @v_cvt_pk_u8_f32_idx_1(i32 addrspace(1)* %out, float %src, i32 %reg) #1 { + %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 1, i32 %reg) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_2: +; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 2, v{{[0-9]+}} +define void @v_cvt_pk_u8_f32_idx_2(i32 addrspace(1)* %out, float %src, i32 %reg) #1 { + %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 2, i32 %reg) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_3: +; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 3, v{{[0-9]+}} +define void @v_cvt_pk_u8_f32_idx_3(i32 addrspace(1)* %out, float %src, i32 %reg) #1 { + %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 3, i32 %reg) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_combine: +; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 0, v{{[0-9]+}} +; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 1, v{{[0-9]+}} +; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 2, v{{[0-9]+}} +; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 3, v{{[0-9]+}} +define void @v_cvt_pk_u8_f32_combine(i32 addrspace(1)* %out, float %src, i32 %reg) #1 { + %result0 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 0, i32 %reg) #0 + %result1 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 1, i32 %result0) #0 + %result2 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 2, i32 %result1) #0 + %result3 = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 3, i32 %result2) #0 + store i32 %result3, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx: +; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define void @v_cvt_pk_u8_f32_idx(i32 addrspace(1)* %out, float %src, i32 %idx, i32 %reg) #1 { + %result = call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src, i32 %idx, i32 %reg) #0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind }