Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -1905,9 +1905,9 @@ (EXTRACT_SUBREG i64:$src1, sub1)), sub1) >; -def : BinOp64Pat ; -def : BinOp64Pat ; -def : BinOp64Pat ; +def : BinOp64Pat ; +def : BinOp64Pat ; +def : BinOp64Pat ; class SextInReg : Pat < (sext_inreg i32:$src0, vt), @@ -1919,7 +1919,7 @@ def : Pat < (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), - (V_BCNT_U32_B32_e32 $popcnt, $val) + (V_BCNT_U32_B32_e64 $popcnt, $val) >; def : Pat < @@ -1939,7 +1939,7 @@ def : Pat < (addc i32:$src0, i32:$src1), - (V_ADD_I32_e32 $src0, $src1) + (V_ADD_I32_e64 $src0, $src1) >; /********** ======================= **********/ @@ -2961,13 +2961,13 @@ def : Pat < (i1 (trunc i32:$a)), - (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1) + (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1) >; //============================================================================// // Miscellaneous Optimization Patterns //============================================================================// -def : SHA256MaPattern ; +def : SHA256MaPattern ; } // End isSI predicate Index: test/CodeGen/R600/and.ll =================================================================== --- test/CodeGen/R600/and.ll +++ test/CodeGen/R600/and.ll @@ -129,11 +129,30 @@ } ; FUNC-LABEL: @v_and_constant_i64 -; SI: V_AND_B32 -; SI: V_AND_B32 +; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { %a = load i64 addrspace(1)* %aptr, align 8 %and = and i64 %a, 1234567 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } + +; FIXME: Replace and 0 with mov 0 +; FUNC-LABEL: @v_and_inline_imm_i64 +; SI: V_AND_B32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}} +; SI: V_AND_B32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}} +define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load i64 addrspace(1)* %aptr, align 8 + %and = and i64 %a, 64 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @s_and_inline_imm_i64 +; SI: S_AND_B64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64 +define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %and = and i64 %a, 64 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} Index: test/CodeGen/R600/bfi_int.ll =================================================================== --- test/CodeGen/R600/bfi_int.ll +++ test/CodeGen/R600/bfi_int.ll @@ -38,8 +38,8 @@ ; R600-CHECK: @bfi_sha256_ma ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W -; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}} -; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{[sv][0-9]+, [sv][0-9]+}} +; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}} +; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}} define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { entry: Index: test/CodeGen/R600/ctpop.ll =================================================================== --- test/CodeGen/R600/ctpop.ll +++ test/CodeGen/R600/ctpop.ll @@ -42,8 +42,7 @@ ; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]], ; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0 ; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]] -; SI-NOT: ADD -; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM @@ -59,6 +58,20 @@ ret void } +; FUNC-LABEL: @v_ctpop_add_sgpr_i32 +; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]], +; SI-NEXT: S_WAITCNT +; SI-NEXT: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} +; SI-NEXT: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM +define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind { + %val0 = load i32 addrspace(1)* %in0, align 4 + %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone + %add = add i32 %ctpop0, %sval + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + ; FUNC-LABEL: @v_ctpop_v2i32: ; SI: V_BCNT_U32_B32_e32 ; SI: V_BCNT_U32_B32_e32 Index: test/CodeGen/R600/llvm.AMDGPU.rsq.ll =================================================================== --- test/CodeGen/R600/llvm.AMDGPU.rsq.ll +++ test/CodeGen/R600/llvm.AMDGPU.rsq.ll @@ -4,10 +4,29 @@ declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone ; FUNC-LABEL: @rsq_f32 -; SI: V_RSQ_F32_e32 +; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}} ; EG: RECIPSQRT_IEEE define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind { %rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone store float %rsq, float addrspace(1)* %out, align 4 ret void } + +; TODO: Really these should be constant folded +; FUNC-LABEL: @rsq_f32_constant_4.0 +; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 4.0 +; EG: RECIPSQRT_IEEE +define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind { + %rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone + store float %rsq, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @rsq_f32_constant_100.0 +; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 0x42c80000 +; EG: RECIPSQRT_IEEE +define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind { + %rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone + store float %rsq, float addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/R600/rsq.ll =================================================================== --- test/CodeGen/R600/rsq.ll +++ test/CodeGen/R600/rsq.ll @@ -26,3 +26,13 @@ store double %div, double addrspace(1)* %out, align 4 ret void } + +; SI-LABEL: @rsq_f32_sgpr +; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}} +; SI: S_ENDPGM +define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind { + %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone + %div = fdiv float 1.0, %sqrt + store float %div, float addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/R600/trunc.ll =================================================================== --- test/CodeGen/R600/trunc.ll +++ test/CodeGen/R600/trunc.ll @@ -46,9 +46,20 @@ } ; SI-LABEL: @trunc_i32_to_i1: -; SI: V_AND_B32 +; SI: V_AND_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} ; SI: V_CMP_EQ_I32 -define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { +define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) { + %a = load i32 addrspace(1)* %ptr, align 4 + %trunc = trunc i32 %a to i1 + %result = select i1 %trunc, i32 1, i32 0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @sgpr_trunc_i32_to_i1: +; SI: V_AND_B32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}} +; SI: V_CMP_EQ_I32 +define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { %trunc = trunc i32 %a to i1 %result = select i1 %trunc, i32 1, i32 0 store i32 %result, i32 addrspace(1)* %out, align 4