Index: llvm/test/CodeGen/AMDGPU/build_vector.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/build_vector.ll +++ llvm/test/CodeGen/AMDGPU/build_vector.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 -; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI -; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=SI +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefixes=R600,ALL +; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefixes=SI,GFX6,ALL +; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=SI,GFX8,ALL +; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10,SI-DAG,ALL -; R600: {{^}}build_vector2: +; ALL-LABEL: {{^}}build_vector2: ; R600: MOV ; R600: MOV ; R600-NOT: MOV -; SI: {{^}}build_vector2: ; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 ; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 ; SI: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}} @@ -16,13 +16,12 @@ ret void } -; R600: {{^}}build_vector4: +; ALL-LABEL: {{^}}build_vector4: ; R600: MOV ; R600: MOV ; R600: MOV ; R600: MOV ; R600-NOT: MOV -; SI: {{^}}build_vector4: ; SI-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5 ; SI-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6 ; SI-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7 @@ -33,3 +32,55 @@ store <4 x i32> , <4 x i32> addrspace(1)* %out ret void } + +; ALL-LABEL: {{^}}build_vector_v2i16: +; R600: MOV +; R600-NOT: MOV +; SI: s_mov_b32 s3, 0xf000 +; SI: s_mov_b32 s2, -1 +; SI: v_mov_b32_e32 v0, 0x60005 +; SI: s_waitcnt lgkmcnt(0) +; SI: buffer_store_dword v0, off, s[0:3], 0 +; GFX10: v_mov_b32_e32 v0, 0 +; GFX10: v_mov_b32_e32 v1, 0x60005 +; GFX10: s_waitcnt lgkmcnt(0) +; GFX10: global_store_dword v0, v1, s[0:1] +define amdgpu_kernel void @build_vector_v2i16 (<2 x i16> addrspace(1)* %out) { +entry: + store <2 x i16> , <2 x i16> addrspace(1)* %out + ret void +} + +; ALL-LABEL: {{^}}build_vector_v2i16_trunc: +; R600: LSHR +; R600: OR_INT +; R600: LSHR +; R600-NOT: MOV +; GFX6: s_mov_b32 s3, 0xf000 +; GFX6: s_waitcnt lgkmcnt(0) +; GFX6: s_lshr_b32 s2, s2, 16 +; GFX6: s_or_b32 s4, s2, 0x50000 +; GFX6: s_mov_b32 s2, -1 +; GFX6: v_mov_b32_e32 v0, s4 +; GFX6: buffer_store_dword v0, off, s[0:3], 0 +; GFX8: s_mov_b32 s7, 0xf000 +; GFX8: s_mov_b32 s6, -1 +; GFX8: s_waitcnt lgkmcnt(0) +; GFX8: s_lshr_b32 s0, s0, 16 +; GFX8: s_or_b32 s0, s0, 0x50000 +; GFX8: v_mov_b32_e32 v0, s0 +; GFX8: buffer_store_dword v0, off, s[4:7], 0 +; GFX10: v_mov_b32_e32 v0, 0 +; GFX10: s_waitcnt lgkmcnt(0) +; GFX10: s_lshr_b32 s2, s2, 16 +; GFX10: s_pack_ll_b32_b16 s2, s2, 5 +; GFX10: v_mov_b32_e32 v1, s2 +; GFX10: global_store_dword v0, v1, s[0:1] +define amdgpu_kernel void @build_vector_v2i16_trunc (<2 x i16> addrspace(1)* %out, i32 %a) { + %srl = lshr i32 %a, 16 + %trunc = trunc i32 %srl to i16 + %ins.0 = insertelement <2 x i16> undef, i16 %trunc, i32 0 + %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1 + store <2 x i16> %ins.1, <2 x i16> addrspace(1)* %out + ret void +}