Index: lib/Target/AMDGPU/VIInstructions.td =================================================================== --- lib/Target/AMDGPU/VIInstructions.td +++ lib/Target/AMDGPU/VIInstructions.td @@ -167,8 +167,7 @@ >; def : ZExt_i16_i1_Pat; - - +//def : ZExt_i16_i1_Pat; def : Pat < (i16 (select i1:$src0, i16:$src1, i16:$src2)), @@ -176,6 +175,7 @@ >; // 16-bit instructions produce a 0 result in the high 16-bits. +/* def : Pat < (add i16:$src0, i16:$src1), (V_ADD_U16_e32 $src0, $src1) @@ -192,5 +192,79 @@ (V_ADD_U16_e32 i16:$src0, i16:$src1), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; +*/ +//---------------------------------------------------------------------- +// Note: 16-bit instructions produce a 0 result in the high 16-bits. + +multiclass i16_arithmetic_pats { + +def : Pat<(op i16 : $src0, i16 : $src1), + (inst i16:$src0, i16:$src1)>; + +def : Pat<(i32 (zext (op i16:$src0, i16:$src1))), + (inst i16:$src0, i16:$src1)>; + +def : Pat<(i64 (zext (op i16:$src0, i16:$src1))), + (REG_SEQUENCE VReg_64, + (inst i16:$src0, i16:$src1), sub0, + (V_MOV_B32_e32 (i32 0)), sub1)>; +} + +////////////////////////////////////////////////////////////////////////////////// +multiclass i16_bits_op_pats { + +def : Pat<(op i16:$src0, i32:$src1), + (inst i16:$src0, i32:$src1) +>; + +def : Pat <(i32 (zext (op i16:$src0, i32:$src1))), + (inst i16:$src0, i32:$src1) +>; + +def : Pat <(i64 (zext (op i16:$src0, i32:$src1))), + (REG_SEQUENCE VReg_64, + (inst i16:$src0, i32:$src1), sub0, + (V_MOV_B32_e32 (i32 0)), sub1) +>; +} +////////////////////////////////////////////////////////////////////////////////// + + +defm : i16_arithmetic_pats; +defm : i16_arithmetic_pats; + +defm : i16_bits_op_pats; +defm : i16_bits_op_pats; +defm : i16_bits_op_pats; + +defm : i16_arithmetic_pats; +defm : i16_arithmetic_pats; +defm : i16_arithmetic_pats; +defm : i16_arithmetic_pats; + + +//Fixme: do we need setcc instruction? +//defm : i16_arithmetic_pats; + +//Looks like we don't need the and, or, xor instructions for i16 +//defm : i16_arithmetic_pats; +//defm : i16_arithmetic_pats; +//defm : i16_arithmetic_pats; + +//Fixme: do we need rotr, rotl instructions? +//defm : i16_arithmetic_pats; //??? +//defm : i16_arithmetic_pats; //??? + +//defm : i16_arithmetic_pats; // ??? +//defm : i16_arithmetic_pats; // ??? + +//defm : i16_arithmetic_pats; // ??? +//defm : i16_arithmetic_pats; // ??? +defm : i16_arithmetic_pats; //??? + +//defm : i16_arithmetic_pats; +//defm : i16_arithmetic_pats; +//defm : i16_arithmetic_pats; + } // End Predicates = [isVI] Index: test/CodeGen/AMDGPU/max.i16.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/max.i16.ll @@ -0,0 +1,168 @@ +; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s + +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + +; FUNC-LABEL: {{^}}v_test_imax_sge_i16: +; VI: v_max_i16_e32 +define void @v_test_imax_sge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0, align 4 + %b = load i16, i16 addrspace(1)* %gep1, align 4 + %cmp = icmp sge i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_test_imax_sge_v4i16: +; VI: v_max_i16_e32 +; VI: v_max_i16_e32 +; VI: v_max_i16_e32 +; VI: v_max_i16_e32 +define void @v_test_imax_sge_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %aptr, <4 x i16> addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep0 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid + %a = load <4 x i16>, <4 x i16> addrspace(1)* %gep0, align 4 + %b = load <4 x i16>, <4 x i16> addrspace(1)* %gep1, align 4 + %cmp = icmp sge <4 x i16> %a, %b + %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b + store <4 x i16> %val, <4 x i16> addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_imax_sge_i16 +; VI: s_max_i16 +define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { + %cmp = icmp sge i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_test_imax_sge_imm_i16: +; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9 +define void @s_test_imax_sge_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind { + %cmp = icmp sge i16 %a, 9 + %val = select i1 %cmp, i16 %a, i16 9 + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i16: +; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9 +define void @s_test_imax_sgt_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind { + %cmp = icmp sgt i16 %a, 9 + %val = select i1 %cmp, i16 %a, i16 9 + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i16: +; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9 +; VI: s_max_i16 {{s[0-9]+}}, {{s[0-9]+}}, 9 +define void @s_test_imax_sgt_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a) nounwind { + %cmp = icmp sgt <2 x i16> %a, + %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> + store <2 x i16> %val, <2 x i16> addrspace(1)* %out, align 4 + ret void +} +; FUNC-LABEL: @v_test_imax_sgt_i16 +; VI: v_max_i16_e32 +define void @v_test_imax_sgt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0, align 4 + %b = load i16, i16 addrspace(1)* %gep1, align 4 + %cmp = icmp sgt i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_imax_sgt_i16 +; VI: s_max_i16 +define void @s_test_imax_sgt_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { + %cmp = icmp sgt i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_test_umax_uge_i16 +; VI: v_max_u32_e32 +define void @v_test_umax_uge_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0, align 4 + %b = load i16, i16 addrspace(1)* %gep1, align 4 + %cmp = icmp uge i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_umax_uge_i16 +; VI: s_max_u32 +define void @s_test_umax_uge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { + %cmp = icmp uge i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_test_umax_uge_v3i16: +; VI: s_max_u32 +; VI: s_max_u32 +; VI: s_max_u32 +; VI-NOT: s_max_u32 +; VI: s_endpgm +define void @s_test_umax_uge_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) nounwind { + %cmp = icmp uge <3 x i16> %a, %b + %val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + store <3 x i16> %val, <3 x i16> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_test_umax_ugt_i16 +; VI: v_max_u32_e32 +define void @v_test_umax_ugt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0, align 4 + %b = load i16, i16 addrspace(1)* %gep1, align 4 + %cmp = icmp ugt i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_test_umax_ugt_i16: +; VI: s_max_u32 +define void @s_test_umax_ugt_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { + %cmp = icmp ugt i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i16: +; VI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15 +; VI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23 +define void @s_test_umax_ugt_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a) nounwind { + %cmp = icmp ugt <2 x i16> %a, + %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> + store <2 x i16> %val, <2 x i16> addrspace(1)* %out, align 4 + ret void +} + Index: test/CodeGen/AMDGPU/min_test.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/min_test.ll @@ -0,0 +1,188 @@ +; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s + +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + +; FUNC-LABEL: {{^}}v_test_imin_sle_i16: +; VI: v_min_i16_e32 +define void @v_test_imin_sle_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0, align 4 + %b = load i16, i16 addrspace(1)* %gep1, align 4 + %cmp = icmp sle i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_test_imin_sle_i16: +; VI: s_min_i16 +define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { + %cmp = icmp sle i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_test_imin_sle_v1i16: +; VI: s_min_i16 +define void @s_test_imin_sle_v1i16(<1 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind { + %cmp = icmp sle <1 x i16> %a, %b + %val = select <1 x i1> %cmp, <1 x i16> %a, <1 x i16> %b + store <1 x i16> %val, <1 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16: +; VI: v_min_i16 +; VI: v_min_i16 +; VI: v_min_i16 +; VI: v_min_i16 +define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind { + %cmp = icmp sle <4 x i16> %a, %b + %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b + store <4 x i16> %val, <4 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @v_test_imin_slt_i16 +; VI: v_min_i16_e32 +define void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0, align 4 + %b = load i16, i16 addrspace(1)* %gep1, align 4 + %cmp = icmp slt i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_imin_slt_i16 +; VI: s_min_i16 +define void @s_test_imin_slt_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { + %cmp = icmp slt i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_test_imin_slt_v2i16: +; VI: s_min_i16 +; VI: s_min_i16 +define void @s_test_imin_slt_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind { + %cmp = icmp slt <2 x i16> %a, %b + %val = select <2 x i1> %cmp, <2 x i16> %a, <2 x i16> %b + store <2 x i16> %val, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i16: +; VI: s_min_i16 {{s[0-9]+}}, {{s[0-9]+}}, 8 +define void @s_test_imin_slt_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind { + %cmp = icmp slt i16 %a, 8 + %val = select i1 %cmp, i16 %a, i16 8 + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i16: +; VI: s_min_i16 {{s[0-9]+}}, {{s[0-9]+}}, 8 +define void @s_test_imin_sle_imm_i16(i16 addrspace(1)* %out, i16 %a) nounwind { + %cmp = icmp sle i16 %a, 8 + %val = select i1 %cmp, i16 %a, i16 8 + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_test_umin_ule_i16 +; VI: v_min_u16_e32 +define void @v_test_umin_ule_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0, align 4 + %b = load i16, i16 addrspace(1)* %gep1, align 4 + %cmp = icmp ule i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @v_test_umin_ule_v3i16 +; VI: v_min_u16_e32 +; VI: v_min_u16_e32 +; VI: v_min_u16_e32 +; VI-NOT: v_min_u16_e32 +; VI: s_endpgm +define void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, <3 x i16> %b) nounwind { + %cmp = icmp ule <3 x i16> %a, %b + %val = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b + store <3 x i16> %val, <3 x i16> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @s_test_umin_ule_i16 +; VI: s_min_u16 +define void @s_test_umin_ule_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { + %cmp = icmp ule i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_test_umin_ult_i16 +; VI: v_min_u16_e32 +define void @v_test_umin_ult_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0, align 4 + %b = load i16, i16 addrspace(1)* %gep1, align 4 + %cmp = icmp ult i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_umin_ult_i16 +; VI: s_min_u16 +define void @s_test_umin_ult_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { + %cmp = icmp ult i16 %a, %b + %val = select i1 %cmp, i16 %a, i16 %b + store i16 %val, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @s_test_umin_ult_v1i16 +; VI: s_min_u16 +define void @s_test_umin_ult_v1i16(<1 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind { + %cmp = icmp ult <1 x i16> %a, %b + %val = select <1 x i1> %cmp, <1 x i16> %a, <1 x i16> %b + store <1 x i16> %val, <1 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16: +; VI: s_min_u16 +; VI: s_min_u16 +; VI: s_min_u16 +; VI: s_min_u16 +; VI: s_min_u16 +; VI: s_min_u16 +; VI: s_min_u16 +; VI: s_min_u16 +define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind { + %cmp = icmp ult <8 x i16> %a, %b + %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b + store <8 x i16> %val, <8 x i16> addrspace(1)* %out + ret void +} + + Index: test/CodeGen/AMDGPU/shl.i16.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/shl.i16.ll @@ -0,0 +1,75 @@ +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=VI %s + +declare i32 @llvm.r600.read.tidig.x() #0 + +;VI: {{^}}shl_v2i16: +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + +define void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 1 + %a = load <2 x i16>, <2 x i16> addrspace(1) * %in + %b = load <2 x i16>, <2 x i16> addrspace(1) * %b_ptr + %result = shl <2 x i16> %a, %b + store <2 x i16> %result, <2 x i16> addrspace(1)* %out + ret void +} + +;VI: {{^}}shl_v4i16: +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + +define void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 1 + %a = load <4 x i16>, <4 x i16> addrspace(1) * %in + %b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr + %result = shl <4 x i16> %a, %b + store <4 x i16> %result, <4 x i16> addrspace(1)* %out + ret void +} + + +;VI: {{^}}shl_i16: +;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} + +define void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { + %b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 + %a = load i16, i16 addrspace(1) * %in + %b = load i16, i16 addrspace(1) * %b_ptr + %result = shl i16 %a, %b + store i16 %result, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_shl_i16_32_bit_constant: +; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] +; SI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x12d687{{$}} +; SI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0{{$}} +; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}, [[VAL]] +define void @v_shl_i16_32_bit_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) { + %a = load i16, i16 addrspace(1)* %aptr, align 8 + %shl = shl i16 1234567, %a + store i16 %shl, i16 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}v_shl_inline_imm_8_i16: +; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, 64, {{v[0-9]+}} +define void @v_shl_inline_imm_64_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) { + %a = load i16, i16 addrspace(1)* %aptr, align 8 + %shl = shl i16 8, %a + store i16 %shl, i16 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_1_i16: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 1, s{{[0-9]+}} +define void @s_shl_inline_imm_1_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 %a) { + %shl = shl i16 1, %a + store i16 %shl, i16 addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/sra.i16.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/sra.i16.ll @@ -0,0 +1,90 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() #0 + +; FUNC-LABEL: {{^}}ashr_v2i16: + +; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + +define void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1 + %a = load <2 x i16>, <2 x i16> addrspace(1)* %in + %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr + %result = ashr <2 x i16> %a, %b + store <2 x i16> %result, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}ashr_v4i16: + +; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_ashrrev_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} + +define void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1 + %a = load <4 x i16>, <4 x i16> addrspace(1)* %in + %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr + %result = ashr <4 x i16> %a, %b + store <4 x i16> %result, <4 x i16> addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}ashr_i16_2: + +; VI: v_ashrrev_i16 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} + +define void @ashr_i16_2(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { +entry: + %b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 + %a = load i16, i16 addrspace(1)* %in + %b = load i16, i16 addrspace(1)* %b_ptr + %result = ashr i16 %a, %b + store i16 %result, i16 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_ashr_2_i16: +; SI: buffer_load_dword v[[HI:[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; VI: flat_load_dword v[[HI:[0-9]+]] +; GCN: v_ashrrev_i16_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]] +; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[HI]]:[[SHIFT]]{{\]}} +define void @v_ashr_32_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() #0 + %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep.in + %result = ashr i16 %a, 2 + store i16 %result, i16 addrspace(1)* %gep.out + ret void +} + +; GCN-LABEL: {{^}}s_ashr_4_i16: +; GCN-DAG: s_load_dword s[[HI:[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} +; GCN: s_ashr_i16 s[[SHIFT:[0-9]+]], s[[HI]], 31 +; GCN: s_mov_b32 s[[COPYSHIFT:[0-9]+]], s[[SHIFT]] +; GCN: s_add_u32 {{s[0-9]+}}, s[[HI]], {{s[0-9]+}} +; GCN: s_addc_u32 {{s[0-9]+}}, s[[COPYSHIFT]], {{s[0-9]+}} +define void @s_ashr_63_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) { + %result = ashr i16 %a, 4 + %add = add i16 %result, %b + store i16 %add, i16 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_ashr_8_i16: +; VI: flat_load_dword v[[HI:[0-9]+]] +define void @v_ashr_63_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { + %tid = call i32 @llvm.r600.read.tidig.x() #0 + %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep.in + %result = ashr i16 %a, 8 + store i16 %result, i16 addrspace(1)* %gep.out + ret void +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/sub.i16.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/sub.i16.ll @@ -0,0 +1,233 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +; GCN-LABEL: {{^}}v_test_sub_i16: +; VI: flat_load_ushort [[A:v[0-9]+]] +; VI: flat_load_ushort [[B:v[0-9]+]] +; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]] +; VI-NEXT: buffer_store_short [[ADD]] +define void @v_test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid + %gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid + %gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid + %a = load volatile i16, i16 addrspace(1)* %gep.in0 + %b = load volatile i16, i16 addrspace(1)* %gep.in1 + %sub = sub i16 %a, %b + store i16 %sub, i16 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_sub_i16_constant: +; VI: flat_load_ushort [[A:v[0-9]+]] +; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], 0x7b, [[A]] +; VI-NEXT: buffer_store_short [[ADD]] +define void @v_test_sub_i16_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid + %gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid + %a = load volatile i16, i16 addrspace(1)* %gep.in0 + %sub = sub i16 %a, 123 + store i16 %sub, i16 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_sub_i16_neg_constant: +; VI: flat_load_ushort [[A:v[0-9]+]] +; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], 0xfffffcb3, [[A]] +; VI-NEXT: buffer_store_short [[ADD]] +define void @v_test_sub_i16_neg_constant(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid + %gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid + %a = load volatile i16, i16 addrspace(1)* %gep.in0 + %sub = sub i16 %a, -845 + store i16 %sub, i16 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_sub_i16_inline_neg1: +; VI: flat_load_ushort [[A:v[0-9]+]] +; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], -1, [[A]] +; VI-NEXT: buffer_store_short [[ADD]] +define void @v_test_sub_i16_inline_neg1(i16 addrspace(1)* %out, i16 addrspace(1)* %in0) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid + %gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid + %a = load volatile i16, i16 addrspace(1)* %gep.in0 + %sub = sub i16 %a, -1 + store i16 %sub, i16 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i32: +; VI: flat_load_ushort [[A:v[0-9]+]] +; VI: flat_load_ushort [[B:v[0-9]+]] +; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]] +; VI-NEXT: buffer_store_dword [[ADD]] +define void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid + %gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid + %gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid + %a = load volatile i16, i16 addrspace(1)* %gep.in0 + %b = load volatile i16, i16 addrspace(1)* %gep.in1 + %sub = sub i16 %a, %b + %ext = zext i16 %sub to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i64: +; VI: flat_load_ushort [[A:v[0-9]+]] +; VI: flat_load_ushort [[B:v[0-9]+]] +; VI-DAG: v_sub_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]] +; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}} +; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}} +define void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid + %gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid + %gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid + %a = load volatile i16, i16 addrspace(1)* %gep.in0 + %b = load volatile i16, i16 addrspace(1)* %gep.in1 + %sub = sub i16 %a, %b + %ext = zext i16 %sub to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_sub_i16_sext_to_i32: +; VI: flat_load_ushort [[A:v[0-9]+]] +; VI: flat_load_ushort [[B:v[0-9]+]] +; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]] +; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16 +; VI-NEXT: buffer_store_dword [[SEXT]] +define void @v_test_sub_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid + %gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid + %gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid + %a = load i16, i16 addrspace(1)* %gep.in0 + %b = load i16, i16 addrspace(1)* %gep.in1 + %sub = sub i16 %a, %b + %ext = sext i16 %sub to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_sub_i16_sext_to_i64: +; VI: flat_load_ushort [[A:v[0-9]+]] +; VI: flat_load_ushort [[B:v[0-9]+]] +; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]] +; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16 +; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] +; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @v_test_sub_i16_sext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid + %gep.in0 = getelementptr inbounds i16, i16 addrspace(1)* %in0, i32 %tid + %gep.in1 = getelementptr inbounds i16, i16 addrspace(1)* %in1, i32 %tid + %a = load i16, i16 addrspace(1)* %gep.in0 + %b = load i16, i16 addrspace(1)* %gep.in1 + %sub = sub i16 %a, %b + %ext = sext i16 %sub to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}s_test_sub_i16: +; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c +; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30 +; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] +; VI: v_sub_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]] +; VI-NEXT: buffer_store_short [[RESULT]] +define void @s_test_sub_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) #1 { + %sub = sub i16 %a, %b + store i16 %sub, i16 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}s_test_sub_i16_zeroext_args: +; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c +; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30 +; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] +; VI: v_sub_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]] +; VI-NEXT: buffer_store_short [[RESULT]] +define void @s_test_sub_i16_zeroext_args(i16 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 { + %sub = sub i16 %a, %b + store i16 %sub, i16 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}s_test_sub_i16_signext_args: +; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c +; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30 +; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] +; VI: v_sub_u16_e32 [[RESULT:v[0-9]+]], [[B]], [[VA]] +; VI-NEXT: buffer_store_short [[RESULT]] +define void @s_test_sub_i16_signext_args(i16 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 { + %sub = sub i16 %a, %b + store i16 %sub, i16 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}s_test_sub_i16_zext_to_i32: +; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c +; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30 +; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] +; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[B]], [[VA]] +; VI-NEXT: buffer_store_dword [[RESULT]] +define void @s_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 { + %sub = sub i16 %a, %b + %ext = zext i16 %sub to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}s_test_sub_i16_zext_to_i64: +; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c +; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30 +; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] +; VI-DAG: v_sub_u16_e32 v[[LO:[0-9]+]], [[B]], [[VA]] +; VI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @s_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) #1 { + %sub = sub i16 %a, %b + %ext = zext i16 %sub to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}s_test_sub_i16_sext_to_i32: +; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c +; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30 +; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] +; VI: v_sub_u16_e32 [[ADD:v[0-9]+]], [[B]], [[VA]] +; VI-NEXT: v_bfe_i32 [[RESULT:v[0-9]+]], [[ADD]], 0, 16 +; VI-NEXT: buffer_store_dword [[RESULT]] +define void @s_test_sub_i16_sext_to_i32(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 { + %sub = sub i16 %a, %b + %ext = sext i16 %sub to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}s_test_sub_i16_sext_to_i64: +; VI-DAG: s_load_dword [[A:s[0-9]+]], s[0:1], 0x2c +; VI-DAG: s_load_dword [[B:s[0-9]+]], s[0:1], 0x30 +; VI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]] +; VI: v_sub_u16_e32 v[[LO:[0-9]+]], [[B]], [[VA]] +; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16 +; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] +; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @s_test_sub_i16_sext_to_i64(i64 addrspace(1)* %out, i16 signext %a, i16 signext %b) #1 { + %sub = sub i16 %a, %b + %ext = sext i16 %sub to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind }