Index: lib/Target/R600/AMDGPUInstructions.td =================================================================== --- lib/Target/R600/AMDGPUInstructions.td +++ lib/Target/R600/AMDGPUInstructions.td @@ -252,6 +252,8 @@ int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding +int FP32_NEG_ONE = 0xbf800000; +int FP32_ONE = 0x3f800000; } def CONST : Constants; Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -2439,6 +2439,27 @@ (S_MOV_B32 -1), sub1) >; +// TODO: Although these return an FP result, we could keep this on SALU. +def : Pat < + (f32 (sint_to_fp i1:$src)), + (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src) +>; + +def : Pat < + (f32 (uint_to_fp i1:$src)), + (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_ONE, $src) +>; + +def : Pat < + (f64 (sint_to_fp i1:$src)), + (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) +>; + +def : Pat < + (f64 (uint_to_fp i1:$src)), + (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)) +>; + //===----------------------------------------------------------------------===// // Miscellaneous Patterns //===----------------------------------------------------------------------===// Index: test/CodeGen/R600/sint_to_fp.ll =================================================================== --- test/CodeGen/R600/sint_to_fp.ll +++ test/CodeGen/R600/sint_to_fp.ll @@ -29,3 +29,25 @@ store <4 x float> %result, <4 x float> addrspace(1)* %out ret void } + +; FUNC-LABEL: @sint_to_fp_i1_f32: +; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.000000e+00, [[CMP]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = uitofp i1 %cmp to float + store float %fp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @sint_to_fp_i1_f32_load: +; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.000000e+00 +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) { + %fp = sitofp i1 %in to float + store float %fp, float addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/R600/sint_to_fp64.ll =================================================================== --- test/CodeGen/R600/sint_to_fp64.ll +++ test/CodeGen/R600/sint_to_fp64.ll @@ -1,9 +1,33 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; CHECK: @sint_to_fp64 -; CHECK: V_CVT_F64_I32_e32 +; SI: @sint_to_fp64 +; SI: V_CVT_F64_I32_e32 define void @sint_to_fp64(double addrspace(1)* %out, i32 %in) { %result = sitofp i32 %in to double store double %result, double addrspace(1)* %out ret void } + +; SI-LABEL: @sint_to_fp_i1_f64: +; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; SI-NEXT: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]+]], 0, -1, [[CMP]] +; SI-NEXT: V_CVT_F64_I32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]] +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], +; SI: S_ENDPGM +define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = sitofp i1 %cmp to double + store double %fp, double addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @sint_to_fp_i1_f64_load: +; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, -1 +; SI-NEXT: V_CVT_F64_I32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]] +; SI: BUFFER_STORE_DWORDX2 [[RESULT]] +; SI: S_ENDPGM +define void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) { + %fp = sitofp i1 %in to double + store double %fp, double addrspace(1)* %out, align 8 + ret void +} Index: test/CodeGen/R600/uint_to_fp.f64.ll =================================================================== --- test/CodeGen/R600/uint_to_fp.f64.ll +++ test/CodeGen/R600/uint_to_fp.f64.ll @@ -2,8 +2,33 @@ ; SI-LABEL: @uint_to_fp_f64_i32 ; SI: V_CVT_F64_U32_e32 +; SI: S_ENDPGM define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) { %cast = uitofp i32 %in to double store double %cast, double addrspace(1)* %out, align 8 ret void } + +; SI-LABEL: @uint_to_fp_i1_f64: +; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; SI-NEXT: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]+]], 0, 1, [[CMP]] +; SI-NEXT: V_CVT_F64_U32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]] +; SI: BUFFER_STORE_DWORDX2 [[RESULT]], +; SI: S_ENDPGM +define void @uint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = uitofp i1 %cmp to double + store double %fp, double addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @uint_to_fp_i1_f64_load: +; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, 1 +; SI-NEXT: V_CVT_F64_U32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]] +; SI: BUFFER_STORE_DWORDX2 [[RESULT]] +; SI: S_ENDPGM +define void @uint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) { + %fp = uitofp i1 %in to double + store double %fp, double addrspace(1)* %out, align 8 + ret void +} Index: test/CodeGen/R600/uint_to_fp.ll =================================================================== --- test/CodeGen/R600/uint_to_fp.ll +++ test/CodeGen/R600/uint_to_fp.ll @@ -1,28 +1,30 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; R600-CHECK-LABEL: @uint_to_fp_v2i32 -; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W -; R600-CHECK-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X -; SI-CHECK-LABEL: @uint_to_fp_v2i32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_CVT_F32_U32_e32 +; FUNC-LABEL: @uint_to_fp_v2i32 +; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W +; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X + +; SI: V_CVT_F32_U32_e32 +; SI: V_CVT_F32_U32_e32 +; SI: S_ENDPGM define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) { %result = uitofp <2 x i32> %in to <2 x float> store <2 x float> %result, <2 x float> addrspace(1)* %out ret void } -; R600-CHECK-LABEL: @uint_to_fp_v4i32 -; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; R600-CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI-CHECK-LABEL: @uint_to_fp_v4i32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_CVT_F32_U32_e32 +; FUNC-LABEL: @uint_to_fp_v4i32 +; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +; SI: V_CVT_F32_U32_e32 +; SI: V_CVT_F32_U32_e32 +; SI: V_CVT_F32_U32_e32 +; SI: V_CVT_F32_U32_e32 +; SI: S_ENDPGM define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %value = load <4 x i32> addrspace(1) * %in %result = uitofp <4 x i32> %value to <4 x float> @@ -30,17 +32,39 @@ ret void } -; R600-CHECK-LABEL: @uint_to_fp_i64_f32 -; R600-CHECK: UINT_TO_FLT -; R600-CHECK: UINT_TO_FLT -; R600-CHECK: MULADD_IEEE -; SI-CHECK-LABEL: @uint_to_fp_i64_f32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_CVT_F32_U32_e32 -; SI-CHECK: V_MAD_F32 +; FUNC-LABEL: @uint_to_fp_i64_f32 +; R600: UINT_TO_FLT +; R600: UINT_TO_FLT +; R600: MULADD_IEEE +; SI: V_CVT_F32_U32_e32 +; SI: V_CVT_F32_U32_e32 +; SI: V_MAD_F32 +; SI: S_ENDPGM define void @uint_to_fp_i64_f32(float addrspace(1)* %out, i64 %in) { entry: %0 = uitofp i64 %in to float store float %0, float addrspace(1)* %out ret void } + +; FUNC-LABEL: @uint_to_fp_i1_f32: +; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]], +; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.000000e+00, [[CMP]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @uint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = uitofp i1 %cmp to float + store float %fp, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @uint_to_fp_i1_f32_load: +; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.000000e+00 +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) { + %fp = uitofp i1 %in to float + store float %fp, float addrspace(1)* %out, align 4 + ret void +}