Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -908,7 +908,7 @@ def : GCNPat < (fabs f32:$src), - (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x7fffffff))) + (S_AND_B32 $src, (S_MOV_B32 (i32 0x7fffffff))) >; def : GCNPat < @@ -969,12 +969,12 @@ def : GCNPat < (fneg f16:$src), - (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000))) + (S_XOR_B32 $src, (S_MOV_B32 (i32 0x00008000))) >; def : GCNPat < (fabs f16:$src), - (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x00007fff))) + (S_AND_B32 $src, (S_MOV_B32 (i32 0x00007fff))) >; def : GCNPat < @@ -984,12 +984,12 @@ def : GCNPat < (fneg v2f16:$src), - (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), $src) + (S_XOR_B32 (S_MOV_B32 (i32 0x80008000)), $src) >; def : GCNPat < (fabs v2f16:$src), - (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), $src) + (S_AND_B32 (S_MOV_B32 (i32 0x7fff7fff)), $src) >; // This is really (fneg (fabs v2f16:$src)) Index: test/CodeGen/AMDGPU/fabs.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.f16.ll +++ test/CodeGen/AMDGPU/fabs.f16.ll @@ -8,14 +8,9 @@ ; GCN-LABEL: {{^}}s_fabs_free_f16: ; GCN: s_load_dword [[VAL:s[0-9]+]] - -; CI: s_and_b32 [[RESULT:s[0-9]+]], [[VAL]], 0x7fff -; CI: v_mov_b32_e32 [[V_RESULT:v[0-9]+]], [[RESULT]] -; CI: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_RESULT]] - -; GFX89: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff -; GFX89: v_and_b32_e32 [[V_RESULT:v[0-9]+]], [[VAL]], [[MASK]] -; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_RESULT]] +; GCN: s_and_b32 [[RESULT:s[0-9]+]], [[VAL]], 0x7fff +; GCN: v_mov_b32_e32 [[V_RESULT:v[0-9]+]], [[RESULT]] +; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_RESULT]] define amdgpu_kernel void @s_fabs_free_f16(half addrspace(1)* %out, i16 %in) { %bc= bitcast i16 %in to half %fabs = call half @llvm.fabs.f16(half %bc) @@ -25,14 +20,9 @@ ; GCN-LABEL: {{^}}s_fabs_f16: ; GCN: s_load_dword [[VAL:s[0-9]+]] - -; CI: s_and_b32 [[RESULT:s[0-9]+]], [[VAL]], 0x7fff -; CI: v_mov_b32_e32 [[V_RESULT:v[0-9]+]], [[RESULT]] -; CI: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_RESULT]] - -; GFX89: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff -; GFX89: v_and_b32_e32 [[V_RESULT:v[0-9]+]], [[VAL]], [[MASK]] -; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_RESULT]] +; GCN: s_and_b32 [[RESULT:s[0-9]+]], [[VAL]], 0x7fff +; GCN: v_mov_b32_e32 [[V_RESULT:v[0-9]+]], [[RESULT]] +; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_RESULT]] define amdgpu_kernel void @s_fabs_f16(half addrspace(1)* %out, half %in) { %fabs = call half @llvm.fabs.f16(half %in) store half %fabs, half addrspace(1)* %out Index: test/CodeGen/AMDGPU/fabs.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.ll +++ test/CodeGen/AMDGPU/fabs.ll @@ -7,37 +7,35 @@ ; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF)) ; unless isFabsFree returns true -; FUNC-LABEL: {{^}}fabs_fn_free: +; FUNC-LABEL: {{^}}s_fabs_fn_free: ; R600-NOT: AND ; R600: |PV.{{[XYZW]}}| -; GCN: v_and_b32 - -define amdgpu_kernel void @fabs_fn_free(float addrspace(1)* %out, i32 %in) { +; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff +define amdgpu_kernel void @s_fabs_fn_free(float addrspace(1)* %out, i32 %in) { %bc= bitcast i32 %in to float %fabs = call float @fabs(float %bc) store float %fabs, float addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}fabs_free: +; FUNC-LABEL: {{^}}s_fabs_free: ; R600-NOT: AND ; R600: |PV.{{[XYZW]}}| -; GCN: v_and_b32 - -define amdgpu_kernel void @fabs_free(float addrspace(1)* %out, i32 %in) { +; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff +define amdgpu_kernel void @s_fabs_free(float addrspace(1)* %out, i32 %in) { %bc= bitcast i32 %in to float %fabs = call float @llvm.fabs.f32(float %bc) store float %fabs, float addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}fabs_f32: +; FUNC-LABEL: {{^}}s_fabs_f32: ; R600: |{{(PV|T[0-9])\.[XYZW]}}| -; GCN: v_and_b32 -define amdgpu_kernel void @fabs_f32(float addrspace(1)* %out, float %in) { +; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff +define amdgpu_kernel void @s_fabs_f32(float addrspace(1)* %out, float %in) { %fabs = call float @llvm.fabs.f32(float %in) store float %fabs, float addrspace(1)* %out ret void Index: test/CodeGen/AMDGPU/fneg-fabs.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -145,8 +145,11 @@ ; GCN-LABEL: {{^}}s_fneg_multi_use_fabs_v2f16: ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff -; GFX9: v_mov_b32_e32 [[VABS:v[0-9]+]], [[ABS]] -; GFX9: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80008000, [[VABS]] +; GFX9: v_mov_b32_e32 [[V_ABS:v[0-9]+]], [[ABS]] +; GFX9: s_xor_b32 [[NEG:s[0-9]+]], 0x80008000, [[ABS]] +; GFX9-DAG: v_mov_b32_e32 [[V_NEG:v[0-9]+]], [[NEG]] +; GFX9-DAG: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[V_ABS]] +; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[V_NEG]] define amdgpu_kernel void @s_fneg_multi_use_fabs_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fneg = fsub <2 x half> , %fabs Index: test/CodeGen/AMDGPU/fneg.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fneg.f16.ll +++ test/CodeGen/AMDGPU/fneg.f16.ll @@ -30,14 +30,9 @@ ; GCN-LABEL: {{^}}s_fneg_free_f16: ; GCN: s_load_dword [[NEG_VALUE:s[0-9]+]], - -; CI: s_xor_b32 [[XOR:s[0-9]+]], [[NEG_VALUE]], 0x8000{{$}} -; CI: v_mov_b32_e32 [[V_XOR:v[0-9]+]], [[XOR]] -; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_XOR]] - -; GFX89: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x8000 -; GFX89: v_xor_b32_e32 [[XOR:v[0-9]+]], [[NEG_VALUE]], [[MASK]] -; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]] +; GCN: s_xor_b32 [[XOR:s[0-9]+]], [[NEG_VALUE]], 0x8000{{$}} +; GCN: v_mov_b32_e32 [[V_XOR:v[0-9]+]], [[XOR]] +; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[V_XOR]] define amdgpu_kernel void @s_fneg_free_f16(half addrspace(1)* %out, i16 %in) #0 { %bc = bitcast i16 %in to half %fsub = fsub half -0.0, %bc @@ -64,20 +59,19 @@ ret void } -; FIXME: scalar for VI, vector for gfx9 +; FIXME: random commute ; GCN-LABEL: {{^}}s_fneg_v2f16: ; CIVI: s_xor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000 -; GFX9: v_xor_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}} +; GFX9: s_xor_b32 s{{[0-9]+}}, 0x80008000, s{{[0-9]+}} define amdgpu_kernel void @s_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 { %fneg = fsub <2 x half> , %in store <2 x half> %fneg, <2 x half> addrspace(1)* %out ret void } -; FIXME: vector on gfx9 ; GCN-LABEL: {{^}}s_fneg_v2f16_nonload: ; CIVI: s_xor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000 -; GFX9: v_xor_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}} +; GFX9: s_xor_b32 s{{[0-9]+}}, 0x80008000, s{{[0-9]+}} define amdgpu_kernel void @s_fneg_v2f16_nonload(<2 x half> addrspace(1)* %out) #0 { %in = call i32 asm sideeffect "; def $0", "=s"() %in.bc = bitcast i32 %in to <2 x half> @@ -102,9 +96,7 @@ ; GCN-LABEL: {{^}}fneg_free_v2f16: ; GCN: s_load_dword [[VAL:s[0-9]+]] ; CIVI: s_xor_b32 s{{[0-9]+}}, [[VAL]], 0x80008000 - -; GFX9: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GFX9: v_xor_b32_e32 v{{[0-9]+}}, 0x80008000, [[VVAL]] +; GFX9: s_xor_b32 s{{[0-9]+}}, 0x80008000, [[VAL]] define amdgpu_kernel void @fneg_free_v2f16(<2 x half> addrspace(1)* %out, i32 %in) #0 { %bc = bitcast i32 %in to <2 x half> %fsub = fsub <2 x half> , %bc