Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -871,6 +871,11 @@ >; def : Pat < + (i16 (sext_inreg i16:$src, i1)), + (S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16 +>; + +def : Pat < (i16 (sext_inreg i16:$src, i8)), (S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16 >; Index: test/CodeGen/AMDGPU/sext-in-reg.ll =================================================================== --- test/CodeGen/AMDGPU/sext-in-reg.ll +++ test/CodeGen/AMDGPU/sext-in-reg.ll @@ -2,6 +2,8 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; FIXME: i16 promotion pass ruins the scalar cases when legal. + ; FUNC-LABEL: {{^}}sext_in_reg_i1_i32: ; GCN: s_load_dword [[ARG:s[0-9]+]], ; GCN: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000 @@ -659,6 +661,137 @@ ret void } +; FUNC-LABEL: {{^}}s_sext_in_reg_i1_i16: +; GCN: s_load_dword [[VAL:s[0-9]+]] + +; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x10000 +; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]] +; SI: buffer_store_short [[VBFE]] + +; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15 +; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} +; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 15 +define void @s_sext_in_reg_i1_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 { + %ld = load i32, i32 addrspace(2)* %ptr + %in = trunc i32 %ld to i16 + %shl = shl i16 %in, 15 + %sext = ashr i16 %shl, 15 + store i16 %sext, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_sext_in_reg_i2_i16: +; GCN: s_load_dword [[VAL:s[0-9]+]] + +; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x20000 +; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]] +; SI: buffer_store_short [[VBFE]] + +; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14 +; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} +; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14 +define void @s_sext_in_reg_i2_i16(i16 addrspace(1)* %out, i32 addrspace(2)* %ptr) #0 { + %ld = load i32, i32 addrspace(2)* %ptr + %in = trunc i32 %ld to i16 + %shl = shl i16 %in, 14 + %sext = ashr i16 %shl, 14 + store i16 %sext, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_sext_in_reg_i1_i16: +; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]] +; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[VAL]], 0, 1{{$}} + +; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]] +define void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid + %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid + + %in = load i16, i16 addrspace(1)* %gep + %shl = shl i16 %in, 15 + %sext = ashr i16 %shl, 15 + store i16 %sext, i16 addrspace(3)* %out.gep + ret void +} + +; FUNC-LABEL: {{^}}v_sext_in_reg_i1_i16_nonload: +; GCN: {{buffer|flat}}_load_ushort [[VAL0:v[0-9]+]] +; GCN: {{buffer|flat}}_load_ushort [[VAL1:v[0-9]+]] + +; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]] +; VI: v_lshlrev_b16_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]] + +; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}} +; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]] +define void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() + %a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid + %a = load volatile i16, i16 addrspace(1)* %a.gep, align 2 + %b = load volatile i16, i16 addrspace(1)* %b.gep, align 2 + + %c = shl i16 %a, %b + %shl = shl i16 %c, 15 + %ashr = ashr i16 %shl, 15 + + store i16 %ashr, i16 addrspace(3)* %out.gep, align 2 + ret void +} + +; FUNC-LABEL: {{^}}s_sext_in_reg_i2_i16_arg: +; GCN: s_load_dword [[VAL:s[0-9]+]] + +; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0x20000 +; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]] +; SI: buffer_store_short [[VBFE]] + +; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}} +; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} +; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 14{{$}} +define void @s_sext_in_reg_i2_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 { + %shl = shl i16 %in, 14 + %sext = ashr i16 %shl, 14 + store i16 %sext, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_sext_in_reg_i8_i16_arg: +; GCN: s_load_dword [[VAL:s[0-9]+]] + +; SI: s_sext_i32_i8 [[SSEXT:s[0-9]+]], [[VAL]] +; SI: v_mov_b32_e32 [[VSEXT:v[0-9]+]], [[SSEXT]] +; SI: buffer_store_short [[VBFE]] + +; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}} +; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} +; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 8{{$}} +define void @s_sext_in_reg_i8_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 { + %shl = shl i16 %in, 8 + %sext = ashr i16 %shl, 8 + store i16 %sext, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_sext_in_reg_i15_i16_arg: +; GCN: s_load_dword [[VAL:s[0-9]+]] + +; SI: s_bfe_i32 [[BFE:s[0-9]+]], [[VAL]], 0xf0000 +; SI: v_mov_b32_e32 [[VBFE:v[0-9]+]], [[BFE]] +; SI: buffer_store_short [[VBFE]] + +; VI: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}} +; VI: s_sext_i32_i16 s{{[0-9]+}}, s{{[0-9]+}} +; VI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1{{$}} +define void @s_sext_in_reg_i15_i16_arg(i16 addrspace(1)* %out, i16 %in) #0 { + %shl = shl i16 %in, 1 + %sext = ashr i16 %shl, 1 + store i16 %sext, i16 addrspace(1)* %out + ret void +} + declare i32 @llvm.r600.read.tidig.x() #1 attributes #0 = { nounwind }