Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -63,6 +63,16 @@ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); } + // Workaround for LegalizeDAG asserting on expansion of i1 vector loads. + setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); + + setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); + + setOperationAction(ISD::STORE, MVT::i8, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Custom); @@ -71,6 +81,10 @@ setTruncStoreAction(MVT::i32, MVT::i8, Custom); setTruncStoreAction(MVT::i32, MVT::i16, Custom); + // Workaround for LegalizeDAG asserting on expansion of i1 vector stores. + setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand); + setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand); + // Set condition code actions setCondCodeAction(ISD::SETO, MVT::f32, Expand); setCondCodeAction(ISD::SETUO, MVT::f32, Expand); Index: test/CodeGen/AMDGPU/extload.ll =================================================================== --- test/CodeGen/AMDGPU/extload.ll +++ test/CodeGen/AMDGPU/extload.ll @@ -1,14 +1,16 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; FUNC-LABEL: {{^}}anyext_load_i8: -; SI: buffer_load_dword v{{[0-9]+}} -; SI: buffer_store_dword v{{[0-9]+}} +; FIXME: This seems to not ever actually become an extload +; FUNC-LABEL: {{^}}global_anyext_load_i8: +; GCN: buffer_load_dword v{{[0-9]+}} +; GCN: buffer_store_dword v{{[0-9]+}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]], ; EG: VTX_READ_32 [[VAL]] -define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind { +define void @global_anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind { %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)* %load = load i32, i32 addrspace(1)* %cast %x = bitcast i32 %load to <4 x i8> @@ -17,13 +19,13 @@ ret void } -; FUNC-LABEL: {{^}}anyext_load_i16: -; SI: buffer_load_dword v{{[0-9]+}} -; SI: buffer_store_dword v{{[0-9]+}} +; FUNC-LABEL: {{^}}global_anyext_load_i16: +; GCN: buffer_load_dword v{{[0-9]+}} +; GCN: buffer_store_dword v{{[0-9]+}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]], ; EG: VTX_READ_32 [[VAL]] -define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind { +define void @global_anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind { %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)* %load = load i32, i32 addrspace(1)* %cast %x = bitcast i32 %load to <2 x i16> @@ -32,13 +34,13 @@ ret void } -; FUNC-LABEL: {{^}}anyext_load_lds_i8: -; SI: ds_read_b32 v{{[0-9]+}} -; SI: ds_write_b32 v{{[0-9]+}} +; FUNC-LABEL: {{^}}local_anyext_load_i8: +; GCN: ds_read_b32 v{{[0-9]+}} +; GCN: ds_write_b32 v{{[0-9]+}} ; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]] ; EG: LDS_WRITE * [[VAL]] -define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind { +define void @local_anyext_load_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind { %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)* %load = load i32, i32 addrspace(3)* %cast %x = bitcast i32 %load to <4 x i8> @@ -47,13 +49,13 @@ ret void } -; FUNC-LABEL: {{^}}anyext_load_lds_i16: -; SI: ds_read_b32 v{{[0-9]+}} -; SI: ds_write_b32 v{{[0-9]+}} +; FUNC-LABEL: {{^}}local_anyext_load_i16: +; GCN: ds_read_b32 v{{[0-9]+}} +; GCN: ds_write_b32 v{{[0-9]+}} ; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]] ; EG: LDS_WRITE * [[VAL]] -define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind { +define void @local_anyext_load_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind { %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)* %load = load i32, i32 addrspace(3)* %cast %x = bitcast i32 %load to <2 x i16> Index: test/CodeGen/AMDGPU/fpext.ll =================================================================== --- test/CodeGen/AMDGPU/fpext.ll +++ test/CodeGen/AMDGPU/fpext.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fpext_f32_to_f64: @@ -18,6 +18,16 @@ ret void } +; FUNC-LABEL: {{^}}fpext_v3f32_to_v3f64: +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +; SI: v_cvt_f64_f32_e32 +define void @fpext_v3f32_to_v3f64(<3 x double> addrspace(1)* %out, <3 x float> %in) { + %result = fpext <3 x float> %in to <3 x double> + store <3 x double> %result, <3 x double> addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}fpext_v4f32_to_v4f64: ; SI: v_cvt_f64_f32_e32 ; SI: v_cvt_f64_f32_e32 Index: test/CodeGen/AMDGPU/global-extload-i1.ll =================================================================== --- test/CodeGen/AMDGPU/global-extload-i1.ll +++ /dev/null @@ -1,302 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; FIXME: Evergreen broken - -; FUNC-LABEL: {{^}}zextload_global_i1_to_i32: -; SI: buffer_load_ubyte -; SI: buffer_store_dword -; SI: s_endpgm -define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { - %a = load i1, i1 addrspace(1)* %in - %ext = zext i1 %a to i32 - store i32 %ext, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_i1_to_i32: -; SI: buffer_load_ubyte -; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}} -; SI: buffer_store_dword -; SI: s_endpgm -define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { - %a = load i1, i1 addrspace(1)* %in - %ext = sext i1 %a to i32 - store i32 %ext, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32: -; SI: s_endpgm -define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i1>, <1 x i1> addrspace(1)* %in - %ext = zext <1 x i1> %load to <1 x i32> - store <1 x i32> %ext, <1 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32: -; SI: s_endpgm -define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i1>, <1 x i1> addrspace(1)* %in - %ext = sext <1 x i1> %load to <1 x i32> - store <1 x i32> %ext, <1 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32: -; SI: s_endpgm -define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i1>, <2 x i1> addrspace(1)* %in - %ext = zext <2 x i1> %load to <2 x i32> - store <2 x i32> %ext, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32: -; SI: s_endpgm -define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i1>, <2 x i1> addrspace(1)* %in - %ext = sext <2 x i1> %load to <2 x i32> - store <2 x i32> %ext, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32: -; SI: s_endpgm -define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i1>, <4 x i1> addrspace(1)* %in - %ext = zext <4 x i1> %load to <4 x i32> - store <4 x i32> %ext, <4 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32: -; SI: s_endpgm -define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i1>, <4 x i1> addrspace(1)* %in - %ext = sext <4 x i1> %load to <4 x i32> - store <4 x i32> %ext, <4 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32: -; SI: s_endpgm -define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i1>, <8 x i1> addrspace(1)* %in - %ext = zext <8 x i1> %load to <8 x i32> - store <8 x i32> %ext, <8 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32: -; SI: s_endpgm -define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i1>, <8 x i1> addrspace(1)* %in - %ext = sext <8 x i1> %load to <8 x i32> - store <8 x i32> %ext, <8 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32: -; SI: s_endpgm -define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i1>, <16 x i1> addrspace(1)* %in - %ext = zext <16 x i1> %load to <16 x i32> - store <16 x i32> %ext, <16 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32: -; SI: s_endpgm -define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i1>, <16 x i1> addrspace(1)* %in - %ext = sext <16 x i1> %load to <16 x i32> - store <16 x i32> %ext, <16 x i32> addrspace(1)* %out - ret void -} - -; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32: -; XSI: s_endpgm -; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { -; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in -; %ext = zext <32 x i1> %load to <32 x i32> -; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out -; ret void -; } - -; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32: -; XSI: s_endpgm -; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { -; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in -; %ext = sext <32 x i1> %load to <32 x i32> -; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out -; ret void -; } - -; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32: -; XSI: s_endpgm -; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { -; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in -; %ext = zext <64 x i1> %load to <64 x i32> -; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out -; ret void -; } - -; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32: -; XSI: s_endpgm -; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { -; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in -; %ext = sext <64 x i1> %load to <64 x i32> -; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out -; ret void -; } - -; FUNC-LABEL: {{^}}zextload_global_i1_to_i64: -; SI: buffer_load_ubyte [[LOAD:v[0-9]+]], -; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} -; SI: buffer_store_dwordx2 -define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { - %a = load i1, i1 addrspace(1)* %in - %ext = zext i1 %a to i64 - store i64 %ext, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_i1_to_i64: -; SI: buffer_load_ubyte [[LOAD:v[0-9]+]], -; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}} -; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]] -; SI: buffer_store_dwordx2 -define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { - %a = load i1, i1 addrspace(1)* %in - %ext = sext i1 %a to i64 - store i64 %ext, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64: -; SI: s_endpgm -define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i1>, <1 x i1> addrspace(1)* %in - %ext = zext <1 x i1> %load to <1 x i64> - store <1 x i64> %ext, <1 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64: -; SI: s_endpgm -define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i1>, <1 x i1> addrspace(1)* %in - %ext = sext <1 x i1> %load to <1 x i64> - store <1 x i64> %ext, <1 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64: -; SI: s_endpgm -define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i1>, <2 x i1> addrspace(1)* %in - %ext = zext <2 x i1> %load to <2 x i64> - store <2 x i64> %ext, <2 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64: -; SI: s_endpgm -define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i1>, <2 x i1> addrspace(1)* %in - %ext = sext <2 x i1> %load to <2 x i64> - store <2 x i64> %ext, <2 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64: -; SI: s_endpgm -define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i1>, <4 x i1> addrspace(1)* %in - %ext = zext <4 x i1> %load to <4 x i64> - store <4 x i64> %ext, <4 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64: -; SI: s_endpgm -define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i1>, <4 x i1> addrspace(1)* %in - %ext = sext <4 x i1> %load to <4 x i64> - store <4 x i64> %ext, <4 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64: -; SI: s_endpgm -define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i1>, <8 x i1> addrspace(1)* %in - %ext = zext <8 x i1> %load to <8 x i64> - store <8 x i64> %ext, <8 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64: -; SI: s_endpgm -define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i1>, <8 x i1> addrspace(1)* %in - %ext = sext <8 x i1> %load to <8 x i64> - store <8 x i64> %ext, <8 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64: -; SI: s_endpgm -define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i1>, <16 x i1> addrspace(1)* %in - %ext = zext <16 x i1> %load to <16 x i64> - store <16 x i64> %ext, <16 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64: -; SI: s_endpgm -define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i1>, <16 x i1> addrspace(1)* %in - %ext = sext <16 x i1> %load to <16 x i64> - store <16 x i64> %ext, <16 x i64> addrspace(1)* %out - ret void -} - -; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64: -; XSI: s_endpgm -; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { -; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in -; %ext = zext <32 x i1> %load to <32 x i64> -; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out -; ret void -; } - -; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64: -; XSI: s_endpgm -; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind { -; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in -; %ext = sext <32 x i1> %load to <32 x i64> -; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out -; ret void -; } - -; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64: -; XSI: s_endpgm -; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { -; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in -; %ext = zext <64 x i1> %load to <64 x i64> -; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out -; ret void -; } - -; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64: -; XSI: s_endpgm -; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind { -; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in -; %ext = sext <64 x i1> %load to <64 x i64> -; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out -; ret void -; } Index: test/CodeGen/AMDGPU/global-extload-i16.ll =================================================================== --- test/CodeGen/AMDGPU/global-extload-i16.ll +++ /dev/null @@ -1,302 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; FIXME: cypress is broken because the bigger testcases spill and it's not implemented - -; FUNC-LABEL: {{^}}zextload_global_i16_to_i32: -; SI: buffer_load_ushort -; SI: buffer_store_dword -; SI: s_endpgm -define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { - %a = load i16, i16 addrspace(1)* %in - %ext = zext i16 %a to i32 - store i32 %ext, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_i16_to_i32: -; SI: buffer_load_sshort -; SI: buffer_store_dword -; SI: s_endpgm -define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { - %a = load i16, i16 addrspace(1)* %in - %ext = sext i16 %a to i32 - store i32 %ext, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32: -; SI: buffer_load_ushort -; SI: s_endpgm -define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i16>, <1 x i16> addrspace(1)* %in - %ext = zext <1 x i16> %load to <1 x i32> - store <1 x i32> %ext, <1 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32: -; SI: buffer_load_sshort -; SI: s_endpgm -define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i16>, <1 x i16> addrspace(1)* %in - %ext = sext <1 x i16> %load to <1 x i32> - store <1 x i32> %ext, <1 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32: -; SI: s_endpgm -define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i16>, <2 x i16> addrspace(1)* %in - %ext = zext <2 x i16> %load to <2 x i32> - store <2 x i32> %ext, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32: -; SI: s_endpgm -define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i16>, <2 x i16> addrspace(1)* %in - %ext = sext <2 x i16> %load to <2 x i32> - store <2 x i32> %ext, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32: -; SI: s_endpgm -define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i16>, <4 x i16> addrspace(1)* %in - %ext = zext <4 x i16> %load to <4 x i32> - store <4 x i32> %ext, <4 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32: -; SI: s_endpgm -define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i16>, <4 x i16> addrspace(1)* %in - %ext = sext <4 x i16> %load to <4 x i32> - store <4 x i32> %ext, <4 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32: -; SI: s_endpgm -define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i16>, <8 x i16> addrspace(1)* %in - %ext = zext <8 x i16> %load to <8 x i32> - store <8 x i32> %ext, <8 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32: -; SI: s_endpgm -define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i16>, <8 x i16> addrspace(1)* %in - %ext = sext <8 x i16> %load to <8 x i32> - store <8 x i32> %ext, <8 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32: -; SI: s_endpgm -define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i16>, <16 x i16> addrspace(1)* %in - %ext = zext <16 x i16> %load to <16 x i32> - store <16 x i32> %ext, <16 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32: -; SI: s_endpgm -define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i16>, <16 x i16> addrspace(1)* %in - %ext = sext <16 x i16> %load to <16 x i32> - store <16 x i32> %ext, <16 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32: -; SI: s_endpgm -define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <32 x i16>, <32 x i16> addrspace(1)* %in - %ext = zext <32 x i16> %load to <32 x i32> - store <32 x i32> %ext, <32 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32: -; SI: s_endpgm -define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <32 x i16>, <32 x i16> addrspace(1)* %in - %ext = sext <32 x i16> %load to <32 x i32> - store <32 x i32> %ext, <32 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32: -; SI: s_endpgm -define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <64 x i16>, <64 x i16> addrspace(1)* %in - %ext = zext <64 x i16> %load to <64 x i32> - store <64 x i32> %ext, <64 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32: -; SI: s_endpgm -define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <64 x i16>, <64 x i16> addrspace(1)* %in - %ext = sext <64 x i16> %load to <64 x i32> - store <64 x i32> %ext, <64 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_i16_to_i64: -; SI: buffer_load_ushort v[[LO:[0-9]+]], -; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} -; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] -define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { - %a = load i16, i16 addrspace(1)* %in - %ext = zext i16 %a to i64 - store i64 %ext, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_i16_to_i64: -; SI: buffer_load_sshort [[LOAD:v[0-9]+]], -; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] -; SI: buffer_store_dwordx2 -define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { - %a = load i16, i16 addrspace(1)* %in - %ext = sext i16 %a to i64 - store i64 %ext, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64: -; SI: s_endpgm -define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i16>, <1 x i16> addrspace(1)* %in - %ext = zext <1 x i16> %load to <1 x i64> - store <1 x i64> %ext, <1 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64: -; SI: s_endpgm -define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i16>, <1 x i16> addrspace(1)* %in - %ext = sext <1 x i16> %load to <1 x i64> - store <1 x i64> %ext, <1 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64: -; SI: s_endpgm -define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i16>, <2 x i16> addrspace(1)* %in - %ext = zext <2 x i16> %load to <2 x i64> - store <2 x i64> %ext, <2 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64: -; SI: s_endpgm -define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i16>, <2 x i16> addrspace(1)* %in - %ext = sext <2 x i16> %load to <2 x i64> - store <2 x i64> %ext, <2 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64: -; SI: s_endpgm -define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i16>, <4 x i16> addrspace(1)* %in - %ext = zext <4 x i16> %load to <4 x i64> - store <4 x i64> %ext, <4 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64: -; SI: s_endpgm -define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i16>, <4 x i16> addrspace(1)* %in - %ext = sext <4 x i16> %load to <4 x i64> - store <4 x i64> %ext, <4 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64: -; SI: s_endpgm -define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i16>, <8 x i16> addrspace(1)* %in - %ext = zext <8 x i16> %load to <8 x i64> - store <8 x i64> %ext, <8 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64: -; SI: s_endpgm -define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i16>, <8 x i16> addrspace(1)* %in - %ext = sext <8 x i16> %load to <8 x i64> - store <8 x i64> %ext, <8 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64: -; SI: s_endpgm -define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i16>, <16 x i16> addrspace(1)* %in - %ext = zext <16 x i16> %load to <16 x i64> - store <16 x i64> %ext, <16 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64: -; SI: s_endpgm -define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i16>, <16 x i16> addrspace(1)* %in - %ext = sext <16 x i16> %load to <16 x i64> - store <16 x i64> %ext, <16 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64: -; SI: s_endpgm -define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <32 x i16>, <32 x i16> addrspace(1)* %in - %ext = zext <32 x i16> %load to <32 x i64> - store <32 x i64> %ext, <32 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64: -; SI: s_endpgm -define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <32 x i16>, <32 x i16> addrspace(1)* %in - %ext = sext <32 x i16> %load to <32 x i64> - store <32 x i64> %ext, <32 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64: -; SI: s_endpgm -define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <64 x i16>, <64 x i16> addrspace(1)* %in - %ext = zext <64 x i16> %load to <64 x i64> - store <64 x i64> %ext, <64 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64: -; SI: s_endpgm -define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { - %load = load <64 x i16>, <64 x i16> addrspace(1)* %in - %ext = sext <64 x i16> %load to <64 x i64> - store <64 x i64> %ext, <64 x i64> addrspace(1)* %out - ret void -} Index: test/CodeGen/AMDGPU/global-extload-i32.ll =================================================================== --- test/CodeGen/AMDGPU/global-extload-i32.ll +++ /dev/null @@ -1,308 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - -; FUNC-LABEL: {{^}}zextload_global_i32_to_i64: -; SI: buffer_load_dword v[[LO:[0-9]+]], -; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} -; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] -define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { - %a = load i32, i32 addrspace(1)* %in - %ext = zext i32 %a to i64 - store i64 %ext, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_i32_to_i64: -; SI: buffer_load_dword [[LOAD:v[0-9]+]], -; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] -; SI: buffer_store_dwordx2 -define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { - %a = load i32, i32 addrspace(1)* %in - %ext = sext i32 %a to i64 - store i64 %ext, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64: -; SI: buffer_load_dword -; SI: buffer_store_dwordx2 -; SI: s_endpgm -define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i32>, <1 x i32> addrspace(1)* %in - %ext = zext <1 x i32> %load to <1 x i64> - store <1 x i64> %ext, <1 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64: -; SI: buffer_load_dword -; SI: v_ashrrev_i32 -; SI: buffer_store_dwordx2 -; SI: s_endpgm -define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i32>, <1 x i32> addrspace(1)* %in - %ext = sext <1 x i32> %load to <1 x i64> - store <1 x i64> %ext, <1 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64: -; SI: buffer_load_dwordx2 -; SI: buffer_store_dwordx4 -; SI: s_endpgm -define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i32>, <2 x i32> addrspace(1)* %in - %ext = zext <2 x i32> %load to <2 x i64> - store <2 x i64> %ext, <2 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64: -; SI: buffer_load_dwordx2 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx4 -; SI: s_endpgm -define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i32>, <2 x i32> addrspace(1)* %in - %ext = sext <2 x i32> %load to <2 x i64> - store <2 x i64> %ext, <2 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64: -; SI: buffer_load_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: s_endpgm -define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i32>, <4 x i32> addrspace(1)* %in - %ext = zext <4 x i32> %load to <4 x i64> - store <4 x i64> %ext, <4 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64: -; SI: buffer_load_dwordx4 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI: s_endpgm -define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i32>, <4 x i32> addrspace(1)* %in - %ext = sext <4 x i32> %load to <4 x i64> - store <4 x i64> %ext, <4 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64: -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI: s_endpgm -define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i32>, <8 x i32> addrspace(1)* %in - %ext = zext <8 x i32> %load to <8 x i64> - store <8 x i64> %ext, <8 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64: -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 - -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI: s_endpgm -define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i32>, <8 x i32> addrspace(1)* %in - %ext = sext <8 x i32> %load to <8 x i64> - store <8 x i64> %ext, <8 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64: -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 - -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx4 - -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx4 - -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx4 - -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: buffer_store_dwordx4 -; SI: s_endpgm -define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i32>, <16 x i32> addrspace(1)* %in - %ext = sext <16 x i32> %load to <16 x i64> - store <16 x i64> %ext, <16 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 - -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 -; SI: s_endpgm -define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i32>, <16 x i32> addrspace(1)* %in - %ext = zext <16 x i32> %load to <16 x i64> - store <16 x i64> %ext, <16 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64: -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 - - -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 -; SI-DAG: v_ashrrev_i32 - -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 - -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 - -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 - -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 - -; SI: s_endpgm -define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <32 x i32>, <32 x i32> addrspace(1)* %in - %ext = sext <32 x i32> %load to <32 x i64> - store <32 x i64> %ext, <32 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64: -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 -; SI: buffer_load_dwordx4 - -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 - -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 - -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 - -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 -; SI-DAG: buffer_store_dwordx4 - -; SI: s_endpgm -define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind { - %load = load <32 x i32>, <32 x i32> addrspace(1)* %in - %ext = zext <32 x i32> %load to <32 x i64> - store <32 x i64> %ext, <32 x i64> addrspace(1)* %out - ret void -} Index: test/CodeGen/AMDGPU/global-extload-i8.ll =================================================================== --- test/CodeGen/AMDGPU/global-extload-i8.ll +++ /dev/null @@ -1,299 +0,0 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - -; FUNC-LABEL: {{^}}zextload_global_i8_to_i32: -; SI: buffer_load_ubyte -; SI: buffer_store_dword -; SI: s_endpgm -define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { - %a = load i8, i8 addrspace(1)* %in - %ext = zext i8 %a to i32 - store i32 %ext, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_i8_to_i32: -; SI: buffer_load_sbyte -; SI: buffer_store_dword -; SI: s_endpgm -define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { - %a = load i8, i8 addrspace(1)* %in - %ext = sext i8 %a to i32 - store i32 %ext, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32: -; SI: s_endpgm -define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i8>, <1 x i8> addrspace(1)* %in - %ext = zext <1 x i8> %load to <1 x i32> - store <1 x i32> %ext, <1 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32: -; SI: s_endpgm -define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i8>, <1 x i8> addrspace(1)* %in - %ext = sext <1 x i8> %load to <1 x i32> - store <1 x i32> %ext, <1 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32: -; SI: s_endpgm -define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i8>, <2 x i8> addrspace(1)* %in - %ext = zext <2 x i8> %load to <2 x i32> - store <2 x i32> %ext, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32: -; SI: s_endpgm -define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i8>, <2 x i8> addrspace(1)* %in - %ext = sext <2 x i8> %load to <2 x i32> - store <2 x i32> %ext, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32: -; SI: s_endpgm -define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i8>, <4 x i8> addrspace(1)* %in - %ext = zext <4 x i8> %load to <4 x i32> - store <4 x i32> %ext, <4 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32: -; SI: s_endpgm -define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i8>, <4 x i8> addrspace(1)* %in - %ext = sext <4 x i8> %load to <4 x i32> - store <4 x i32> %ext, <4 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32: -; SI: s_endpgm -define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i8>, <8 x i8> addrspace(1)* %in - %ext = zext <8 x i8> %load to <8 x i32> - store <8 x i32> %ext, <8 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32: -; SI: s_endpgm -define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i8>, <8 x i8> addrspace(1)* %in - %ext = sext <8 x i8> %load to <8 x i32> - store <8 x i32> %ext, <8 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32: -; SI: s_endpgm -define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i8>, <16 x i8> addrspace(1)* %in - %ext = zext <16 x i8> %load to <16 x i32> - store <16 x i32> %ext, <16 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32: -; SI: s_endpgm -define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i8>, <16 x i8> addrspace(1)* %in - %ext = sext <16 x i8> %load to <16 x i32> - store <16 x i32> %ext, <16 x i32> addrspace(1)* %out - ret void -} - -; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32: -; XSI: s_endpgm -; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { -; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in -; %ext = zext <32 x i8> %load to <32 x i32> -; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out -; ret void -; } - -; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32: -; XSI: s_endpgm -; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { -; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in -; %ext = sext <32 x i8> %load to <32 x i32> -; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out -; ret void -; } - -; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32: -; XSI: s_endpgm -; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { -; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in -; %ext = zext <64 x i8> %load to <64 x i32> -; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out -; ret void -; } - -; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32: -; XSI: s_endpgm -; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { -; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in -; %ext = sext <64 x i8> %load to <64 x i32> -; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out -; ret void -; } - -; FUNC-LABEL: {{^}}zextload_global_i8_to_i64: -; SI: buffer_load_ubyte v[[LO:[0-9]+]], -; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} -; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] -define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { - %a = load i8, i8 addrspace(1)* %in - %ext = zext i8 %a to i64 - store i64 %ext, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_i8_to_i64: -; SI: buffer_load_sbyte [[LOAD:v[0-9]+]], -; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] -; SI: buffer_store_dwordx2 -define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { - %a = load i8, i8 addrspace(1)* %in - %ext = sext i8 %a to i64 - store i64 %ext, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64: -; SI: s_endpgm -define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i8>, <1 x i8> addrspace(1)* %in - %ext = zext <1 x i8> %load to <1 x i64> - store <1 x i64> %ext, <1 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64: -; SI: s_endpgm -define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <1 x i8>, <1 x i8> addrspace(1)* %in - %ext = sext <1 x i8> %load to <1 x i64> - store <1 x i64> %ext, <1 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64: -; SI: s_endpgm -define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i8>, <2 x i8> addrspace(1)* %in - %ext = zext <2 x i8> %load to <2 x i64> - store <2 x i64> %ext, <2 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64: -; SI: s_endpgm -define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <2 x i8>, <2 x i8> addrspace(1)* %in - %ext = sext <2 x i8> %load to <2 x i64> - store <2 x i64> %ext, <2 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64: -; SI: s_endpgm -define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i8>, <4 x i8> addrspace(1)* %in - %ext = zext <4 x i8> %load to <4 x i64> - store <4 x i64> %ext, <4 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64: -; SI: s_endpgm -define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <4 x i8>, <4 x i8> addrspace(1)* %in - %ext = sext <4 x i8> %load to <4 x i64> - store <4 x i64> %ext, <4 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64: -; SI: s_endpgm -define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i8>, <8 x i8> addrspace(1)* %in - %ext = zext <8 x i8> %load to <8 x i64> - store <8 x i64> %ext, <8 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64: -; SI: s_endpgm -define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <8 x i8>, <8 x i8> addrspace(1)* %in - %ext = sext <8 x i8> %load to <8 x i64> - store <8 x i64> %ext, <8 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64: -; SI: s_endpgm -define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i8>, <16 x i8> addrspace(1)* %in - %ext = zext <16 x i8> %load to <16 x i64> - store <16 x i64> %ext, <16 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64: -; SI: s_endpgm -define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <16 x i8>, <16 x i8> addrspace(1)* %in - %ext = sext <16 x i8> %load to <16 x i64> - store <16 x i64> %ext, <16 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64: -; SI: s_endpgm -define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <32 x i8>, <32 x i8> addrspace(1)* %in - %ext = zext <32 x i8> %load to <32 x i64> - store <32 x i64> %ext, <32 x i64> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64: -; SI: s_endpgm -define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { - %load = load <32 x i8>, <32 x i8> addrspace(1)* %in - %ext = sext <32 x i8> %load to <32 x i64> - store <32 x i64> %ext, <32 x i64> addrspace(1)* %out - ret void -} - -; ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64: -; ; XSI: s_endpgm -; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { -; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in -; %ext = zext <64 x i8> %load to <64 x i64> -; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out -; ret void -; } - -; ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64: -; ; XSI: s_endpgm -; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { -; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in -; %ext = sext <64 x i8> %load to <64 x i64> -; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out -; ret void -; } Index: test/CodeGen/AMDGPU/kernel-args.ll =================================================================== --- test/CodeGen/AMDGPU/kernel-args.ll +++ test/CodeGen/AMDGPU/kernel-args.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC +; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC @@ -475,3 +475,55 @@ ; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 ; ret void ; } + +; FUNC-LABEL: {{^}}i1_arg: +; SI: buffer_load_ubyte +; SI: v_and_b32_e32 +; SI: buffer_store_byte +; SI: s_endpgm +define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { + store i1 %x, i1 addrspace(1)* %out, align 1 + ret void +} + +; FUNC-LABEL: {{^}}i1_arg_zext_i32: +; SI: buffer_load_ubyte +; SI: buffer_store_dword +; SI: s_endpgm +define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { + %ext = zext i1 %x to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}i1_arg_zext_i64: +; SI: buffer_load_ubyte +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { + %ext = zext i1 %x to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}i1_arg_sext_i32: +; SI: buffer_load_ubyte +; SI: buffer_store_dword +; SI: s_endpgm +define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { + %ext = sext i1 %x to i32 + store i32 %ext, i32addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}i1_arg_sext_i64: +; SI: buffer_load_ubyte +; SI: v_bfe_i32 +; SI: v_ashrrev_i32 +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { + %ext = sext i1 %x to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +} Index: test/CodeGen/AMDGPU/load-constant-f64.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-constant-f64.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}constant_load_f64: +; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}] +; GCN-NOHSA: buffer_store_dwordx2 +; GCN-HSA: flat_store_dwordx2 +define void @constant_load_f64(double addrspace(1)* %out, double addrspace(2)* %in) #0 { + %ld = load double, double addrspace(2)* %in + store double %ld, double addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-constant-i1.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-constant-i1.ll @@ -0,0 +1,370 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}constant_load_i1: +; GCN: buffer_load_ubyte +; GCN: v_and_b32_e32 v{{[0-9]+}}, 1 +; GCN: buffer_store_byte + +; EG: VTX_READ_8 +; EG: AND_INT +define void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 { + %load = load i1, i1 addrspace(2)* %in + store i1 %load, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v2i1: +define void @constant_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(2)* %in + store <2 x i1> %load, <2 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v3i1: +define void @constant_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(2)* %in + store <3 x i1> %load, <3 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v4i1: +define void @constant_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(2)* %in + store <4 x i1> %load, <4 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v8i1: +define void @constant_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(2)* %in + store <8 x i1> %load, <8 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v16i1: +define void @constant_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(2)* %in + store <16 x i1> %load, <16 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v32i1: +define void @constant_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(2)* %in + store <32 x i1> %load, <32 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v64i1: +define void @constant_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(2)* %in + store <64 x i1> %load, <64 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_i1_to_i32: +; GCN: buffer_load_ubyte +; GCN: buffer_store_dword +define void @constant_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 { + %a = load i1, i1 addrspace(2)* %in + %ext = zext i1 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_i1_to_i32: +; GCN: buffer_load_ubyte +; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}} +; GCN: buffer_store_dword + +; EG: VTX_READ_8 +; EG: BFE_INT +define void @constant_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 { + %a = load i1, i1 addrspace(2)* %in + %ext = sext i1 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i32: +define void @constant_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(2)* %in + %ext = zext <1 x i1> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i32: +define void @constant_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(2)* %in + %ext = sext <1 x i1> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i32: +define void @constant_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(2)* %in + %ext = zext <2 x i1> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i32: +define void @constant_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(2)* %in + %ext = sext <2 x i1> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i32: +define void @constant_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(2)* %in + %ext = zext <3 x i1> %load to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i32: +define void @constant_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(2)* %in + %ext = sext <3 x i1> %load to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i32: +define void @constant_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(2)* %in + %ext = zext <4 x i1> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i32: +define void @constant_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(2)* %in + %ext = sext <4 x i1> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i32: +define void @constant_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(2)* %in + %ext = zext <8 x i1> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i32: +define void @constant_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(2)* %in + %ext = sext <8 x i1> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i32: +define void @constant_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(2)* %in + %ext = zext <16 x i1> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i32: +define void @constant_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(2)* %in + %ext = sext <16 x i1> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i32: +define void @constant_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(2)* %in + %ext = zext <32 x i1> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i32: +define void @constant_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(2)* %in + %ext = sext <32 x i1> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i32: +define void @constant_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(2)* %in + %ext = zext <64 x i1> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i32: +define void @constant_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(2)* %in + %ext = sext <64 x i1> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_i1_to_i64: +; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], +; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} +; GCN: buffer_store_dwordx2 +define void @constant_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 { + %a = load i1, i1 addrspace(2)* %in + %ext = zext i1 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_i1_to_i64: +; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], +; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}} +; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]] +; GCN: buffer_store_dwordx2 +define void @constant_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(2)* nocapture %in) #0 { + %a = load i1, i1 addrspace(2)* %in + %ext = sext i1 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v1i1_to_v1i64: +define void @constant_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(2)* %in + %ext = zext <1 x i1> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v1i1_to_v1i64: +define void @constant_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(2)* %in + %ext = sext <1 x i1> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v2i1_to_v2i64: +define void @constant_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(2)* %in + %ext = zext <2 x i1> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v2i1_to_v2i64: +define void @constant_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(2)* %in + %ext = sext <2 x i1> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v3i1_to_v3i64: +define void @constant_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(2)* %in + %ext = zext <3 x i1> %load to <3 x i64> + store <3 x i64> %ext, <3 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v3i1_to_v3i64: +define void @constant_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(2)* %in + %ext = sext <3 x i1> %load to <3 x i64> + store <3 x i64> %ext, <3 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v4i1_to_v4i64: +define void @constant_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(2)* %in + %ext = zext <4 x i1> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v4i1_to_v4i64: +define void @constant_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(2)* %in + %ext = sext <4 x i1> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v8i1_to_v8i64: +define void @constant_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(2)* %in + %ext = zext <8 x i1> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v8i1_to_v8i64: +define void @constant_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(2)* %in + %ext = sext <8 x i1> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v16i1_to_v16i64: +define void @constant_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(2)* %in + %ext = zext <16 x i1> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v16i1_to_v16i64: +define void @constant_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(2)* %in + %ext = sext <16 x i1> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v32i1_to_v32i64: +define void @constant_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(2)* %in + %ext = zext <32 x i1> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v32i1_to_v32i64: +define void @constant_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(2)* %in + %ext = sext <32 x i1> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v64i1_to_v64i64: +define void @constant_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(2)* %in + %ext = zext <64 x i1> %load to <64 x i64> + store <64 x i64> %ext, <64 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v64i1_to_v64i64: +define void @constant_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(2)* nocapture %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(2)* %in + %ext = sext <64 x i1> %load to <64 x i64> + store <64 x i64> %ext, <64 x i64> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-constant-i16.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-constant-i16.ll @@ -0,0 +1,761 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}constant_load_i16: +; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}} +; GCN-HSA: flat_load_ushort + +; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +define void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(2)* %in) { +entry: + %ld = load i16, i16 addrspace(2)* %in + store i16 %ld, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v2i16: +; GCN: s_load_dword s + +; EG: VTX_READ_32 +define void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) { +entry: + %ld = load <2 x i16>, <2 x i16> addrspace(2)* %in + store <2 x i16> %ld, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v3i16: +; GCN: s_load_dwordx2 s + +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_16 +define void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) { +entry: + %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in + store <3 x i16> %ld, <3 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v4i16: +; GCN: s_load_dwordx2 + +; EG: VTX_READ_64 +define void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) { +entry: + %ld = load <4 x i16>, <4 x i16> addrspace(2)* %in + store <4 x i16> %ld, <4 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v8i16: +; GCN: s_load_dwordx4 + +; EG: VTX_READ_128 +define void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) { +entry: + %ld = load <8 x i16>, <8 x i16> addrspace(2)* %in + store <8 x i16> %ld, <8 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v16i16: +; GCN: s_load_dwordx8 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) { +entry: + %ld = load <16 x i16>, <16 x i16> addrspace(2)* %in + store <16 x i16> %ld, <16 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_store_dword + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_store_dword + +; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +define void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 { + %a = load i16, i16 addrspace(2)* %in + %ext = zext i16 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_i16_to_i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_store_dword + +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_store_dword + +; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal +; EG: 16 +define void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 { + %a = load i16, i16 addrspace(2)* %in + %ext = sext i16 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-HSA: flat_load_ushort +define void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(2)* %in + %ext = zext <1 x i16> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-HSA: flat_load_sshort +define void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(2)* %in + %ext = sext <1 x i16> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +define void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(2)* %in + %ext = zext <2 x i16> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort + +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort + +; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal +; EG-DAG: 16 +; EG-DAG: 16 +define void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(2)* %in + %ext = sext <2 x i16> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_constant_zextload_v3i16_to_v3i32: +; GCN: s_load_dwordx2 +define void @constant_constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) { +entry: + %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in + %ext = zext <3 x i16> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_constant_sextload_v3i16_to_v3i32: +; GCN: s_load_dwordx2 +define void @constant_constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) { +entry: + %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in + %ext = sext <3 x i16> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_constant_zextload_v4i16_to_v4i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort + +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +define void @constant_constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(2)* %in + %ext = zext <4 x i16> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort + +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort + +; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; EG-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] +; EG-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal +; EG-DAG: 16 +; EG-DAG: 16 +; EG-DAG: 16 +; EG-DAG: 16 +define void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(2)* %in + %ext = sext <4 x i16> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +define void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(2)* %in + %ext = zext <8 x i16> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort + +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +define void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(2)* %in + %ext = sext <8 x i16> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +define void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(2)* %in + %ext = zext <16 x i16> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i32: +define void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(2)* %in + %ext = sext <16 x i16> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +define void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(2)* %in + %ext = zext <32 x i16> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort + +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +define void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(2)* %in + %ext = sext <32 x i16> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +define void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 { + %load = load <64 x i16>, <64 x i16> addrspace(2)* %in + %ext = zext <64 x i16> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i32: +define void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 { + %load = load <64 x i16>, <64 x i16> addrspace(2)* %in + %ext = sext <64 x i16> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_i16_to_i64: +; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]], +; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]], +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} + +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 { + %a = load i16, i16 addrspace(2)* %in + %ext = zext i16 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_i16_to_i64: +; GCN-NOHSA-DAG: buffer_load_sshort v[[LO:[0-9]+]], +; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]], +; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] + +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 { + %a = load i16, i16 addrspace(2)* %in + %ext = sext i16 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i64: +define void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(2)* %in + %ext = zext <1 x i16> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i64: +define void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(2)* %in + %ext = sext <1 x i16> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i64: +define void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(2)* %in + %ext = zext <2 x i16> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i64: +define void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(2)* %in + %ext = sext <2 x i16> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i64: +define void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(2)* %in + %ext = zext <4 x i16> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i64: +define void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(2)* %in + %ext = sext <4 x i16> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i64: +define void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(2)* %in + %ext = zext <8 x i16> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i64: +define void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(2)* %in + %ext = sext <8 x i16> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i64: +define void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(2)* %in + %ext = zext <16 x i16> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i64: +define void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(2)* %in + %ext = sext <16 x i16> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i64: +define void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(2)* %in + %ext = zext <32 x i16> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i64: +define void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(2)* %in + %ext = sext <32 x i16> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; ; XFUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i64: +; define void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 { +; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in +; %ext = zext <64 x i16> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +; ; XFUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i64: +; define void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 { +; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in +; %ext = sext <64 x i16> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-constant-i32.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-constant-i32.ll @@ -0,0 +1,378 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}constant_load_i32: +; GCN: s_load_dword s{{[0-9]+}} + +; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +define void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) #0 { +entry: + %ld = load i32, i32 addrspace(2)* %in + store i32 %ld, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v2i32: +; GCN: s_load_dwordx2 + +; EG: VTX_READ_64 +define void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 { +entry: + %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in + store <2 x i32> %ld, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v3i32: +; GCN: s_load_dwordx4 + +; EG: VTX_READ_128 +define void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(2)* %in) #0 { +entry: + %ld = load <3 x i32>, <3 x i32> addrspace(2)* %in + store <3 x i32> %ld, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v4i32: +; GCN: s_load_dwordx4 + +; EG: VTX_READ_128 +define void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 { +entry: + %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in + store <4 x i32> %ld, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v8i32: +; GCN: s_load_dwordx8 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 { +entry: + %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in + store <8 x i32> %ld, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v16i32: +; GCN: s_load_dwordx16 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 { +entry: + %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in + store <16 x i32> %ld, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_i32_to_i64: +; GCN-DAG: s_load_dword s[[SLO:[0-9]+]], +; GCN-DAG: v_mov_b32_e32 v[[SHI:[0-9]+]], 0{{$}} +; GCN: store_dwordx2 + +; EG: MEM_RAT +; EG: MEM_RAT +define void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 { + %ld = load i32, i32 addrspace(2)* %in + %ext = zext i32 %ld to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_i32_to_i64: +; GCN: s_load_dword s[[SLO:[0-9]+]] +; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[SLO]], 31 +; GCN: store_dwordx2 + +; EG: MEM_RAT +; EG: MEM_RAT +; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x +; EG: 31 +define void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 { + %ld = load i32, i32 addrspace(2)* %in + %ext = sext i32 %ld to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64: +; GCN: s_load_dword +; GCN: store_dwordx2 +define void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 { + %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in + %ext = zext <1 x i32> %ld to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v1i32_to_v1i64: +; GCN: s_load_dword s[[LO:[0-9]+]] +; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31 +; GCN: store_dwordx2 +define void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 { + %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in + %ext = sext <1 x i32> %ld to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64: +; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} +; GCN: store_dwordx4 +define void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 { + %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in + %ext = zext <2 x i32> %ld to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v2i32_to_v2i64: +; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} + +; GCN-DAG: s_ashr_i32 +; GCN-DAG: s_ashr_i32 + +; GCN: store_dwordx4 +define void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 { + %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in + %ext = sext <2 x i32> %ld to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v4i32_to_v4i64: +; GCN: s_load_dwordx4 + +; GCN: store_dwordx4 +; GCN: store_dwordx4 +define void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 { + %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in + %ext = zext <4 x i32> %ld to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v4i32_to_v4i64: +; GCN: s_load_dwordx4 + +; GCN: s_ashr_i32 +; GCN: s_ashr_i32 +; GCN: s_ashr_i32 +; GCN: s_ashr_i32 + +; GCN: store_dwordx4 +; GCN: store_dwordx4 +define void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 { + %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in + %ext = sext <4 x i32> %ld to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v8i32_to_v8i64: +; GCN: s_load_dwordx8 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-SA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +define void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 { + %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in + %ext = zext <8 x i32> %ld to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v8i32_to_v8i64: +; GCN: s_load_dwordx8 + +; GCN: s_ashr_i32 +; GCN: s_ashr_i32 +; GCN: s_ashr_i32 +; GCN: s_ashr_i32 +; GCN: s_ashr_i32 +; GCN: s_ashr_i32 +; GCN: s_ashr_i32 +; GCN: s_ashr_i32 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +define void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 { + %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in + %ext = sext <8 x i32> %ld to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v16i32_to_v16i64: +; GCN: s_load_dwordx16 + + +; GCN-DAG: s_ashr_i32 + +; GCN: store_dwordx4 +; GCN: store_dwordx4 +; GCN: store_dwordx4 +; GCN: store_dwordx4 +; GCN: store_dwordx4 +; GCN: store_dwordx4 +; GCN: store_dwordx4 +; GCN: store_dwordx4 +define void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 { + %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in + %ext = sext <16 x i32> %ld to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v16i32_to_v16i64 +; GCN: s_load_dwordx16 + +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +define void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 { + %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in + %ext = zext <16 x i32> %ld to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v32i32_to_v32i64: + +; GCN: s_load_dwordx16 +; GCN: s_load_dwordx16 + +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 + +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 + +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 + +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 + +define void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 { + %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in + %ext = sext <32 x i32> %ld to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v32i32_to_v32i64: +; GCN: s_load_dwordx16 +; GCN: s_load_dwordx16 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +define void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 { + %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in + %ext = zext <32 x i32> %ld to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-constant-i64.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-constant-i64.ll @@ -0,0 +1,90 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +; FUNC-LABEL: {{^}}constant_load_i64: +; GCN: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} +; EG: VTX_READ_64 +define void @constant_load_i64(i64 addrspace(1)* %out, i64 addrspace(2)* %in) #0 { + %ld = load i64, i64 addrspace(2)* %in + store i64 %ld, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v2i64: +; GCN: s_load_dwordx4 + +; EG: VTX_READ_128 +define void @constant_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(2)* %in) #0 { +entry: + %ld = load <2 x i64>, <2 x i64> addrspace(2)* %in + store <2 x i64> %ld, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v3i64: +; GCN-DAG: s_load_dwordx4 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} +; SI-DAG: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x4{{$}} +; VI-DAG: s_load_dwordx2 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x10{{$}} + +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_32 +define void @constant_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(2)* %in) #0 { +entry: + %ld = load <3 x i64>, <3 x i64> addrspace(2)* %in + store <3 x i64> %ld, <3 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v4i64 +; GCN: s_load_dwordx8 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @constant_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(2)* %in) #0 { +entry: + %ld = load <4 x i64>, <4 x i64> addrspace(2)* %in + store <4 x i64> %ld, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v8i64: +; GCN: s_load_dwordx16 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @constant_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(2)* %in) #0 { +entry: + %ld = load <8 x i64>, <8 x i64> addrspace(2)* %in + store <8 x i64> %ld, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v16i64: +; GCN: s_load_dwordx16 +; GCN: s_load_dwordx16 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @constant_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(2)* %in) #0 { +entry: + %ld = load <16 x i64>, <16 x i64> addrspace(2)* %in + store <16 x i64> %ld, <16 x i64> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-constant-i8.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-constant-i8.ll @@ -0,0 +1,605 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +; FUNC-LABEL: {{^}}constant_load_i8: +; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}} +; GCN-HSA: flat_load_ubyte + +; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +define void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(2)* %in) #0 { +entry: + %ld = load i8, i8 addrspace(2)* %in + store i8 %ld, i8 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v2i8: +; GCN-NOHSA: buffer_load_ushort v +; GCN-HSA: flat_load_ushort v + +; EG: VTX_READ_16 +define void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 { +entry: + %ld = load <2 x i8>, <2 x i8> addrspace(2)* %in + store <2 x i8> %ld, <2 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v3i8: +; GCN: s_load_dword s + +; EG-DAG: VTX_READ_32 +define void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 { +entry: + %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in + store <3 x i8> %ld, <3 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v4i8: +; GCN: s_load_dword s + +; EG: VTX_READ_32 +define void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 { +entry: + %ld = load <4 x i8>, <4 x i8> addrspace(2)* %in + store <4 x i8> %ld, <4 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v8i8: +; GCN: s_load_dwordx2 + +; EG: VTX_READ_64 +define void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 { +entry: + %ld = load <8 x i8>, <8 x i8> addrspace(2)* %in + store <8 x i8> %ld, <8 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_v16i8: +; GCN: s_load_dwordx4 + +; EG: VTX_READ_128 +define void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 { +entry: + %ld = load <16 x i8>, <16 x i8> addrspace(2)* %in + store <16 x i8> %ld, <16 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_i8_to_i32: +; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, +; GCN-HSA: flat_load_ubyte + +; EG: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} +define void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 { + %a = load i8, i8 addrspace(2)* %in + %ext = zext i8 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_i8_to_i32: +; GCN-NOHSA: buffer_load_sbyte +; GCN-HSA: flat_load_sbyte + +; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal +; EG: 8 +define void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(2)* %in) #0 { + %ld = load i8, i8 addrspace(2)* %in + %ext = sext i8 %ld to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32: +define void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(2)* %in + %ext = zext <1 x i8> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i32: +define void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(2)* %in + %ext = sext <1 x i8> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i32: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; EG: VTX_READ_8 +; EG: VTX_READ_8 +define void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(2)* %in + %ext = zext <2 x i8> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i32: +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte + +; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal +; EG-DAG: 8 +; EG-DAG: 8 +define void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(2)* %in + %ext = sext <2 x i8> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v3i8_to_v3i32: +; GCN: s_load_dword s + +; GCN-DAG: s_bfe_u32 +; GCN-DAG: s_bfe_u32 +; GCN-DAG: s_and_b32 +define void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 { +entry: + %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in + %ext = zext <3 x i8> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v3i8_to_v3i32: +; GCN: s_load_dword s + +; GCN-DAG: s_bfe_i32 +; GCN-DAG: s_bfe_i32 +; GCN-DAG: s_bfe_i32 +define void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(2)* %in) #0 { +entry: + %ld = load <3 x i8>, <3 x i8> addrspace(2)* %in + %ext = sext <3 x i8> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i32: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte + +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +define void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(2)* %in + %ext = zext <4 x i8> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i32: +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte + +; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; EG-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] +; EG-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal +; EG-DAG: 8 +; EG-DAG: 8 +; EG-DAG: 8 +; EG-DAG: 8 +define void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(2)* %in + %ext = sext <4 x i8> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i32: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte + +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +define void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(2)* %in + %ext = zext <8 x i8> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i32: +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte + +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +define void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(2)* %in + %ext = sext <8 x i8> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i32: +define void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(2)* %in + %ext = zext <16 x i8> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i32: +define void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(2)* %in + %ext = sext <16 x i8> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i32: +define void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(2)* %in + %ext = zext <32 x i8> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i32: +define void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(2)* %in + %ext = sext <32 x i8> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i32: +define void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 { + %load = load <64 x i8>, <64 x i8> addrspace(2)* %in + %ext = zext <64 x i8> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i32: +define void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 { + %load = load <64 x i8>, <64 x i8> addrspace(2)* %in + %ext = sext <64 x i8> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_i8_to_i64: +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} + +; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]], +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] + +; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]], +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]] +define void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 { + %a = load i8, i8 addrspace(2)* %in + %ext = zext i8 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_i8_to_i64: +; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]], +; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]], +; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] + +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(2)* %in) #0 { + %a = load i8, i8 addrspace(2)* %in + %ext = sext i8 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i64: +define void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(2)* %in + %ext = zext <1 x i8> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i64: +define void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(2)* %in + %ext = sext <1 x i8> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64: +define void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(2)* %in + %ext = zext <2 x i8> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64: +define void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(2)* %in + %ext = sext <2 x i8> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64: +define void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(2)* %in + %ext = zext <4 x i8> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64: +define void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(2)* %in + %ext = sext <4 x i8> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64: +define void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(2)* %in + %ext = zext <8 x i8> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64: +define void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(2)* %in + %ext = sext <8 x i8> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64: +define void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(2)* %in + %ext = zext <16 x i8> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64: +define void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(2)* %in + %ext = sext <16 x i8> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i64: +define void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(2)* %in + %ext = zext <32 x i8> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i64: +define void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(2)* %in + %ext = sext <32 x i8> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64: +; define void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in +; %ext = zext <64 x i8> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64: +; define void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in +; %ext = sext <64 x i8> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +; FUNC-LABEL: {{^}}constant_zextload_i8_to_i16: +; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]], +; GCN-NOHSA: buffer_store_short v[[VAL]] + +; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]], +; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] +define void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 { + %a = load i8, i8 addrspace(2)* %in + %ext = zext i8 %a to i16 + store i16 %ext, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_i8_to_i16: +; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]], +; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]], + +; GCN-NOHSA: buffer_store_short v[[VAL]] +; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] +define void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(2)* %in) #0 { + %a = load i8, i8 addrspace(2)* %in + %ext = sext i8 %a to i16 + store i16 %ext, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16: +define void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(2)* %in + %ext = zext <1 x i8> %load to <1 x i16> + store <1 x i16> %ext, <1 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i16: +define void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(2)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(2)* %in + %ext = sext <1 x i8> %load to <1 x i16> + store <1 x i16> %ext, <1 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16: +define void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(2)* %in + %ext = zext <2 x i8> %load to <2 x i16> + store <2 x i16> %ext, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i16: +define void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(2)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(2)* %in + %ext = sext <2 x i8> %load to <2 x i16> + store <2 x i16> %ext, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16: +define void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(2)* %in + %ext = zext <4 x i8> %load to <4 x i16> + store <4 x i16> %ext, <4 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i16: +define void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(2)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(2)* %in + %ext = sext <4 x i8> %load to <4 x i16> + store <4 x i16> %ext, <4 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16: +define void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(2)* %in + %ext = zext <8 x i8> %load to <8 x i16> + store <8 x i16> %ext, <8 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i16: +define void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(2)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(2)* %in + %ext = sext <8 x i8> %load to <8 x i16> + store <8 x i16> %ext, <8 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16: +define void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(2)* %in + %ext = zext <16 x i8> %load to <16 x i16> + store <16 x i16> %ext, <16 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i16: +define void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(2)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(2)* %in + %ext = sext <16 x i8> %load to <16 x i16> + store <16 x i16> %ext, <16 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i16: +define void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(2)* %in + %ext = zext <32 x i8> %load to <32 x i16> + store <32 x i16> %ext, <32 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i16: +define void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(2)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(2)* %in + %ext = sext <32 x i8> %load to <32 x i16> + store <32 x i16> %ext, <32 x i16> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16: +; define void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in +; %ext = zext <64 x i8> %load to <64 x i16> +; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16: +; define void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(2)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(2)* %in +; %ext = sext <64 x i8> %load to <64 x i16> +; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out +; ret void +; } + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-global-f32.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-global-f32.ll @@ -0,0 +1,93 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s + +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}global_load_f32: +; GCN-NOHSA: buffer_load_dword v{{[0-9]+}} +; GCN-HSA: flat_load_dword + +; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +define void @global_load_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +entry: + %tmp0 = load float, float addrspace(1)* %in + store float %tmp0, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v2f32: +; GCN-NOHSA: buffer_load_dwordx2 +; GCN-HSA: flat_load_dwordx2 + +; R600: VTX_READ_64 +define void @global_load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 { +entry: + %tmp0 = load <2 x float>, <2 x float> addrspace(1)* %in + store <2 x float> %tmp0, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v3f32: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; R600: VTX_READ_128 +define void @global_load_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { +entry: + %tmp0 = load <3 x float>, <3 x float> addrspace(1)* %in + store <3 x float> %tmp0, <3 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v4f32: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; R600: VTX_READ_128 +define void @global_load_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { +entry: + %tmp0 = load <4 x float>, <4 x float> addrspace(1)* %in + store <4 x float> %tmp0, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v8f32: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; R600: VTX_READ_128 +; R600: VTX_READ_128 +define void @global_load_v8f32(<8 x float> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 { +entry: + %tmp0 = load <8 x float>, <8 x float> addrspace(1)* %in + store <8 x float> %tmp0, <8 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v16f32: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; R600: VTX_READ_128 +; R600: VTX_READ_128 +; R600: VTX_READ_128 +; R600: VTX_READ_128 +define void @global_load_v16f32(<16 x float> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 { +entry: + %tmp0 = load <16 x float>, <16 x float> addrspace(1)* %in + store <16 x float> %tmp0, <16 x float> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-global-f64.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-global-f64.ll @@ -0,0 +1,94 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}global_load_f64: +; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] +; GCN-NOHSA: buffer_store_dwordx2 [[VAL]] + +; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] +; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]] +define void @global_load_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %ld = load double, double addrspace(1)* %in + store double %ld, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v2i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +define void @global_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) #0 { +entry: + %ld = load <2 x i64>, <2 x i64> addrspace(1)* %in + store <2 x i64> %ld, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v3f64: +; GCN-NOHSA-DAG: buffer_load_dwordx4 +; GCN-NOHSA-DAG: buffer_load_dwordx2 +; GCN-HSA-DAG: flat_load_dwordx4 +; GCN-HSA-DAG: flat_load_dwordx2 +define void @global_load_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %in) #0 { +entry: + %ld = load <3 x double>, <3 x double> addrspace(1)* %in + store <3 x double> %ld, <3 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v4f64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +define void @global_load_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) #0 { +entry: + %ld = load <4 x double>, <4 x double> addrspace(1)* %in + store <4 x double> %ld, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v8f64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +define void @global_load_v8f64(<8 x double> addrspace(1)* %out, <8 x double> addrspace(1)* %in) #0 { +entry: + %ld = load <8 x double>, <8 x double> addrspace(1)* %in + store <8 x double> %ld, <8 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v16f64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +define void @global_load_v16f64(<16 x double> addrspace(1)* %out, <16 x double> addrspace(1)* %in) #0 { +entry: + %ld = load <16 x double>, <16 x double> addrspace(1)* %in + store <16 x double> %ld, <16 x double> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-global-i1.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-global-i1.ll @@ -0,0 +1,370 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}global_load_i1: +; GCN: buffer_load_ubyte +; GCN: v_and_b32_e32 v{{[0-9]+}}, 1 +; GCN: buffer_store_byte + +; EG: VTX_READ_8 +; EG: AND_INT +define void @global_load_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) #0 { + %load = load i1, i1 addrspace(1)* %in + store i1 %load, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v2i1: +define void @global_load_v2i1(<2 x i1> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(1)* %in + store <2 x i1> %load, <2 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v3i1: +define void @global_load_v3i1(<3 x i1> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(1)* %in + store <3 x i1> %load, <3 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v4i1: +define void @global_load_v4i1(<4 x i1> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(1)* %in + store <4 x i1> %load, <4 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v8i1: +define void @global_load_v8i1(<8 x i1> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(1)* %in + store <8 x i1> %load, <8 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v16i1: +define void @global_load_v16i1(<16 x i1> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(1)* %in + store <16 x i1> %load, <16 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v32i1: +define void @global_load_v32i1(<32 x i1> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(1)* %in + store <32 x i1> %load, <32 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v64i1: +define void @global_load_v64i1(<64 x i1> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(1)* %in + store <64 x i1> %load, <64 x i1> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_i1_to_i32: +; GCN: buffer_load_ubyte +; GCN: buffer_store_dword +define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 { + %a = load i1, i1 addrspace(1)* %in + %ext = zext i1 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_i1_to_i32: +; GCN: buffer_load_ubyte +; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}} +; GCN: buffer_store_dword + +; EG: VTX_READ_8 +; EG: BFE_INT +define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) #0 { + %a = load i1, i1 addrspace(1)* %in + %ext = sext i1 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i32: +define void @global_zextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(1)* %in + %ext = zext <1 x i1> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i32: +define void @global_sextload_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(1)* %in + %ext = sext <1 x i1> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i32: +define void @global_zextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(1)* %in + %ext = zext <2 x i1> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i32: +define void @global_sextload_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(1)* %in + %ext = sext <2 x i1> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i32: +define void @global_zextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(1)* %in + %ext = zext <3 x i1> %load to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i32: +define void @global_sextload_v3i1_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(1)* %in + %ext = sext <3 x i1> %load to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i32: +define void @global_zextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(1)* %in + %ext = zext <4 x i1> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i32: +define void @global_sextload_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(1)* %in + %ext = sext <4 x i1> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i32: +define void @global_zextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(1)* %in + %ext = zext <8 x i1> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i32: +define void @global_sextload_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(1)* %in + %ext = sext <8 x i1> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i32: +define void @global_zextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(1)* %in + %ext = zext <16 x i1> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i32: +define void @global_sextload_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(1)* %in + %ext = sext <16 x i1> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i32: +define void @global_zextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(1)* %in + %ext = zext <32 x i1> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i32: +define void @global_sextload_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(1)* %in + %ext = sext <32 x i1> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i32: +define void @global_zextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(1)* %in + %ext = zext <64 x i1> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i32: +define void @global_sextload_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(1)* %in + %ext = sext <64 x i1> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_i1_to_i64: +; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], +; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} +; GCN: buffer_store_dwordx2 +define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 { + %a = load i1, i1 addrspace(1)* %in + %ext = zext i1 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_i1_to_i64: +; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], +; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}} +; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]] +; GCN: buffer_store_dwordx2 +define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) #0 { + %a = load i1, i1 addrspace(1)* %in + %ext = sext i1 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v1i1_to_v1i64: +define void @global_zextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(1)* %in + %ext = zext <1 x i1> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v1i1_to_v1i64: +define void @global_sextload_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(1)* %in + %ext = sext <1 x i1> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v2i1_to_v2i64: +define void @global_zextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(1)* %in + %ext = zext <2 x i1> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v2i1_to_v2i64: +define void @global_sextload_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(1)* %in + %ext = sext <2 x i1> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v3i1_to_v3i64: +define void @global_zextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(1)* %in + %ext = zext <3 x i1> %load to <3 x i64> + store <3 x i64> %ext, <3 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v3i1_to_v3i64: +define void @global_sextload_v3i1_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x i1> addrspace(1)* %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(1)* %in + %ext = sext <3 x i1> %load to <3 x i64> + store <3 x i64> %ext, <3 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v4i1_to_v4i64: +define void @global_zextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(1)* %in + %ext = zext <4 x i1> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v4i1_to_v4i64: +define void @global_sextload_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(1)* %in + %ext = sext <4 x i1> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v8i1_to_v8i64: +define void @global_zextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(1)* %in + %ext = zext <8 x i1> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v8i1_to_v8i64: +define void @global_sextload_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(1)* %in + %ext = sext <8 x i1> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v16i1_to_v16i64: +define void @global_zextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(1)* %in + %ext = zext <16 x i1> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v16i1_to_v16i64: +define void @global_sextload_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(1)* %in + %ext = sext <16 x i1> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v32i1_to_v32i64: +define void @global_zextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(1)* %in + %ext = zext <32 x i1> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v32i1_to_v32i64: +define void @global_sextload_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(1)* %in + %ext = sext <32 x i1> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v64i1_to_v64i64: +define void @global_zextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(1)* %in + %ext = zext <64 x i1> %load to <64 x i64> + store <64 x i64> %ext, <64 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v64i1_to_v64i64: +define void @global_sextload_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(1)* %in + %ext = sext <64 x i1> %load to <64 x i64> + store <64 x i64> %ext, <64 x i64> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-global-i16.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-global-i16.ll @@ -0,0 +1,774 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FIXME: r600 is broken because the bigger testcases spill and it's not implemented + +; FUNC-LABEL: {{^}}global_load_i16: +; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}} +; GCN-HSA: flat_load_ushort + +; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +define void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { +entry: + %ld = load i16, i16 addrspace(1)* %in + store i16 %ld, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v2i16: +; GCN-NOHSA: buffer_load_dword v +; GCN-HSA: flat_load_dword v + +; EG: VTX_READ_32 +define void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { +entry: + %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in + store <2 x i16> %ld, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v3i16: +; GCN-NOHSA: buffer_load_dwordx2 v +; GCN-HSA: flat_load_dwordx2 v + +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_16 +define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { +entry: + %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in + store <3 x i16> %ld, <3 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v4i16: +; GCN-NOHSA: buffer_load_dwordx2 +; GCN-HSA: flat_load_dwordx2 + +; EG: VTX_READ_64 +define void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { +entry: + %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in + store <4 x i16> %ld, <4 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v8i16: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +define void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) { +entry: + %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in + store <8 x i16> %ld, <8 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v16i16: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) { +entry: + %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in + store <16 x i16> %ld, <16 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_i16_to_i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_store_dword + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_store_dword + +; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +define void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { + %a = load i16, i16 addrspace(1)* %in + %ext = zext i16 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_i16_to_i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_store_dword + +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_store_dword + +; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal +; EG: 16 +define void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { + %a = load i16, i16 addrspace(1)* %in + %ext = sext i16 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-HSA: flat_load_ushort +define void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(1)* %in + %ext = zext <1 x i16> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-HSA: flat_load_sshort +define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(1)* %in + %ext = sext <1 x i16> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(1)* %in + %ext = zext <2 x i16> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort + +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort + +; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal +; EG-DAG: 16 +; EG-DAG: 16 +define void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(1)* %in + %ext = sext <2 x i16> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_global_zextload_v3i16_to_v3i32: +; GCN-NOHSA: buffer_load_dwordx2 +; GCN-HSA: flat_load_dwordx2 +define void @global_global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { +entry: + %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in + %ext = zext <3 x i16> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_global_sextload_v3i16_to_v3i32: +; GCN-NOHSA: buffer_load_dwordx2 +; GCN-HSA: flat_load_dwordx2 +define void @global_global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { +entry: + %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in + %ext = sext <3 x i16> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_global_zextload_v4i16_to_v4i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort + +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +; EG: VTX_READ_16 +define void @global_global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(1)* %in + %ext = zext <4 x i16> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort + +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort + +; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; EG-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] +; EG-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal +; EG-DAG: 16 +; EG-DAG: 16 +; EG-DAG: 16 +; EG-DAG: 16 +define void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(1)* %in + %ext = sext <4 x i16> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +define void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(1)* %in + %ext = zext <8 x i16> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort + +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +define void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(1)* %in + %ext = sext <8 x i16> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +define void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(1)* %in + %ext = zext <16 x i16> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i32: +define void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(1)* %in + %ext = sext <16 x i16> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +define void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(1)* %in + %ext = zext <32 x i16> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i32: +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort +; GCN-NOHSA: buffer_load_sshort + +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +; GCN-HSA: flat_load_sshort +define void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(1)* %in + %ext = sext <32 x i16> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +define void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { + %load = load <64 x i16>, <64 x i16> addrspace(1)* %in + %ext = zext <64 x i16> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i32: +define void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { + %load = load <64 x i16>, <64 x i16> addrspace(1)* %in + %ext = sext <64 x i16> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_i16_to_i64: +; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]], +; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]], +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} + +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { + %a = load i16, i16 addrspace(1)* %in + %ext = zext i16 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_i16_to_i64: +; GCN-NOHSA-DAG: buffer_load_sshort v[[LO:[0-9]+]], +; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]], +; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] + +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { + %a = load i16, i16 addrspace(1)* %in + %ext = sext i16 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i64: +define void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(1)* %in + %ext = zext <1 x i16> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i64: +define void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(1)* %in + %ext = sext <1 x i16> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i64: +define void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(1)* %in + %ext = zext <2 x i16> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i64: +define void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(1)* %in + %ext = sext <2 x i16> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i64: +define void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(1)* %in + %ext = zext <4 x i16> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i64: +define void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(1)* %in + %ext = sext <4 x i16> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i64: +define void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(1)* %in + %ext = zext <8 x i16> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i64: +define void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(1)* %in + %ext = sext <8 x i16> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i64: +define void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(1)* %in + %ext = zext <16 x i16> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i64: +define void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(1)* %in + %ext = sext <16 x i16> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i64: +define void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(1)* %in + %ext = zext <32 x i16> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i64: +define void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(1)* %in + %ext = sext <32 x i16> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; ; XFUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i64: +; define void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { +; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in +; %ext = zext <64 x i16> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +; ; XFUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i64: +; define void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { +; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in +; %ext = sext <64 x i16> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-global-i32.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-global-i32.ll @@ -0,0 +1,523 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s + +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +; FUNC-LABEL: {{^}}global_load_i32: +; GCN-NOHSA: buffer_load_dword v{{[0-9]+}} +; GCN-HSA: flat_load_dword + +; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +define void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +entry: + %ld = load i32, i32 addrspace(1)* %in + store i32 %ld, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v2i32: +; GCN-NOHSA: buffer_load_dwordx2 +; GCN-HSA: flat_load_dwordx2 + +; EG: VTX_READ_64 +define void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 { +entry: + %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in + store <2 x i32> %ld, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v3i32: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +define void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 { +entry: + %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in + store <3 x i32> %ld, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v4i32: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +define void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { +entry: + %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in + store <4 x i32> %ld, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v8i32: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 { +entry: + %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in + store <8 x i32> %ld, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v16i32: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 { +entry: + %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in + store <16 x i32> %ld, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_i32_to_i64: +; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]], +; GCN-HSA-DAG: flat_load_dword v[[LO:[0-9]+]], +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} + +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]] + +; EG: MEM_RAT +; EG: MEM_RAT +define void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %ld = load i32, i32 addrspace(1)* %in + %ext = zext i32 %ld to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_i32_to_i64: +; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]] +; GCN-HSA: flat_load_dword v[[LO:[0-9]+]] +; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} + +; EG: MEM_RAT +; EG: MEM_RAT +; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x +; EG: 31 +define void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %ld = load i32, i32 addrspace(1)* %in + %ext = sext i32 %ld to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64: +; GCN-NOHSA: buffer_load_dword +; GCN-NOHSA: buffer_store_dwordx2 + +; GCN-HSA: flat_load_dword +; GCN-HSA: flat_store_dwordx2 +define void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 { + %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in + %ext = zext <1 x i32> %ld to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64: +; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]] +; GCN-HSA: flat_load_dword v[[LO:[0-9]+]] +; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 { + %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in + %ext = sext <1 x i32> %ld to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64: +; GCN-NOHSA: buffer_load_dwordx2 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-HSA: flat_load_dwordx2 +; GCN-HSA: flat_store_dwordx4 +define void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 { + %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in + %ext = zext <2 x i32> %ld to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64: +; GCN-NOHSA: buffer_load_dwordx2 +; GCN-HSA: flat_load_dwordx2 + +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +define void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 { + %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in + %ext = sext <2 x i32> %ld to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +define void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { + %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in + %ext = zext <4 x i32> %ld to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +define void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { + %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in + %ext = sext <4 x i32> %ld to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-SA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +define void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 { + %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in + %ext = zext <8 x i32> %ld to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +define void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 { + %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in + %ext = sext <8 x i32> %ld to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + + +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 + +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 + +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 + +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +define void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 { + %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in + %ext = sext <16 x i32> %ld to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +define void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 { + %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in + %ext = zext <16 x i32> %ld to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64: + +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 +; GCN-DAG: v_ashrrev_i32 + +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA: buffer_store_dwordx4 + +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 + +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 + +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 + +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 +; GCN-HSA: flat_store_dwordx4 + +define void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 { + %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in + %ext = sext <32 x i32> %ld to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 + + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 + +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +define void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 { + %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in + %ext = zext <32 x i32> %ld to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-global-i64.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-global-i64.ll @@ -0,0 +1,125 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s + +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}global_load_i64: +; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] +; GCN-NOHSA: buffer_store_dwordx2 [[VAL]] + +; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] +; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]] + +; EG: VTX_READ_64 +define void @global_load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #0 { + %ld = load i64, i64 addrspace(1)* %in + store i64 %ld, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v2i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +define void @global_load_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) #0 { +entry: + %ld = load <2 x i64>, <2 x i64> addrspace(1)* %in + store <2 x i64> %ld, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v3i64: +; GCN-NOHSA-DAG: buffer_load_dwordx4 +; GCN-NOHSA-DAG: buffer_load_dwordx2 +; GCN-HSA-DAG: flat_load_dwordx4 +; GCN-HSA-DAG: flat_load_dwordx2 + +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_32 +; EG-DAG: VTX_READ_32 +define void @global_load_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %in) #0 { +entry: + %ld = load <3 x i64>, <3 x i64> addrspace(1)* %in + store <3 x i64> %ld, <3 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v4i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @global_load_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { +entry: + %ld = load <4 x i64>, <4 x i64> addrspace(1)* %in + store <4 x i64> %ld, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v8i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @global_load_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %in) #0 { +entry: + %ld = load <8 x i64>, <8 x i64> addrspace(1)* %in + store <8 x i64> %ld, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v16i64: +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +; EG: VTX_READ_128 +define void @global_load_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %in) #0 { +entry: + %ld = load <16 x i64>, <16 x i64> addrspace(1)* %in + store <16 x i64> %ld, <16 x i64> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-global-i8.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-global-i8.ll @@ -0,0 +1,579 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +; FUNC-LABEL: {{^}}global_load_i8: +; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}} +; GCN-HSA: flat_load_ubyte + +; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 +define void @global_load_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +entry: + %ld = load i8, i8 addrspace(1)* %in + store i8 %ld, i8 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v2i8: +; GCN-NOHSA: buffer_load_ushort v +; GCN-HSA: flat_load_ushort v + +; EG: VTX_READ_16 +define void @global_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { +entry: + %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in + store <2 x i8> %ld, <2 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v3i8: +; GCN-NOHSA: buffer_load_dword v +; GCN-HSA: flat_load_dword v + +; EG-DAG: VTX_READ_32 +define void @global_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 { +entry: + %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in + store <3 x i8> %ld, <3 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v4i8: +; GCN-NOHSA: buffer_load_dword v +; GCN-HSA: flat_load_dword v + +; EG: VTX_READ_32 +define void @global_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { +entry: + %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in + store <4 x i8> %ld, <4 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v8i8: +; GCN-NOHSA: buffer_load_dwordx2 +; GCN-HSA: flat_load_dwordx2 + +; EG: VTX_READ_64 +define void @global_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { +entry: + %ld = load <8 x i8>, <8 x i8> addrspace(1)* %in + store <8 x i8> %ld, <8 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_load_v16i8: +; GCN-NOHSA: buffer_load_dwordx4 + +; GCN-HSA: flat_load_dwordx4 + +; EG: VTX_READ_128 +define void @global_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { +entry: + %ld = load <16 x i8>, <16 x i8> addrspace(1)* %in + store <16 x i8> %ld, <16 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_i8_to_i32: +; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, +; GCN-HSA: flat_load_ubyte + +; EG: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} +define void @global_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { + %a = load i8, i8 addrspace(1)* %in + %ext = zext i8 %a to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_i8_to_i32: +; GCN-NOHSA: buffer_load_sbyte +; GCN-HSA: flat_load_sbyte + +; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal +; EG: 8 +define void @global_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { + %ld = load i8, i8 addrspace(1)* %in + %ext = sext i8 %ld to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32: +define void @global_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(1)* %in + %ext = zext <1 x i8> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i32: +define void @global_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(1)* %in + %ext = sext <1 x i8> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i32: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; EG: VTX_READ_8 +; EG: VTX_READ_8 +define void @global_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(1)* %in + %ext = zext <2 x i8> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i32: +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte + +; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal +; EG-DAG: 8 +; EG-DAG: 8 +define void @global_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(1)* %in + %ext = sext <2 x i8> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v3i8_to_v3i32: +; GCN-NOHSA: buffer_load_dword v +; GCN-HSA: flat_load_dword v + +; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 +; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8 +; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff, +define void @global_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 { +entry: + %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in + %ext = zext <3 x i8> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v3i8_to_v3i32: +; GCN-NOHSA: buffer_load_dword v +; GCN-HSA: flat_load_dword v + +; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 +; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 +; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8 +define void @global_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 { +entry: + %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in + %ext = sext <3 x i8> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i32: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte + +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +; EG: VTX_READ_8 +define void @global_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in + %ext = zext <4 x i8> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i32: +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-NOHSA: buffer_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte +; GCN-HSA: flat_load_sbyte + +; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] +; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] +; EG-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] +; EG-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal +; EG-DAG: 8 +; EG-DAG: 8 +; EG-DAG: 8 +; EG-DAG: 8 +define void @global_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in + %ext = sext <4 x i8> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i32: +define void @global_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(1)* %in + %ext = zext <8 x i8> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i32: +define void @global_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(1)* %in + %ext = sext <8 x i8> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i32: +define void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(1)* %in + %ext = zext <16 x i8> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i32: +define void @global_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(1)* %in + %ext = sext <16 x i8> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i32: +define void @global_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(1)* %in + %ext = zext <32 x i8> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i32: +define void @global_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(1)* %in + %ext = sext <32 x i8> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i32: +define void @global_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { + %load = load <64 x i8>, <64 x i8> addrspace(1)* %in + %ext = zext <64 x i8> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i32: +define void @global_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { + %load = load <64 x i8>, <64 x i8> addrspace(1)* %in + %ext = sext <64 x i8> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_i8_to_i64: +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} + +; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]], +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] + +; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]], +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]] +define void @global_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { + %a = load i8, i8 addrspace(1)* %in + %ext = zext i8 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_i8_to_i64: +; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]], +; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]], +; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] + +; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @global_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { + %a = load i8, i8 addrspace(1)* %in + %ext = sext i8 %a to i64 + store i64 %ext, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i64: +define void @global_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(1)* %in + %ext = zext <1 x i8> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i64: +define void @global_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(1)* %in + %ext = sext <1 x i8> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64: +define void @global_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(1)* %in + %ext = zext <2 x i8> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64: +define void @global_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(1)* %in + %ext = sext <2 x i8> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64: +define void @global_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in + %ext = zext <4 x i8> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64: +define void @global_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in + %ext = sext <4 x i8> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64: +define void @global_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(1)* %in + %ext = zext <8 x i8> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64: +define void @global_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(1)* %in + %ext = sext <8 x i8> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64: +define void @global_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(1)* %in + %ext = zext <16 x i8> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64: +define void @global_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(1)* %in + %ext = sext <16 x i8> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i64: +define void @global_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(1)* %in + %ext = zext <32 x i8> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i64: +define void @global_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(1)* %in + %ext = sext <32 x i8> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64: +; define void @global_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in +; %ext = zext <64 x i8> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64: +; define void @global_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in +; %ext = sext <64 x i8> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out +; ret void +; } + +; FUNC-LABEL: {{^}}global_zextload_i8_to_i16: +; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]], +; GCN-NOHSA: buffer_store_short v[[VAL]] + +; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]], +; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] +define void @global_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { + %a = load i8, i8 addrspace(1)* %in + %ext = zext i8 %a to i16 + store i16 %ext, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_i8_to_i16: +; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]], +; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]], + +; GCN-NOHSA: buffer_store_short v[[VAL]] +; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] +define void @global_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { + %a = load i8, i8 addrspace(1)* %in + %ext = sext i8 %a to i16 + store i16 %ext, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16: +define void @global_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(1)* %in + %ext = zext <1 x i8> %load to <1 x i16> + store <1 x i16> %ext, <1 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i16: +define void @global_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(1)* %in + %ext = sext <1 x i8> %load to <1 x i16> + store <1 x i16> %ext, <1 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16: +define void @global_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(1)* %in + %ext = zext <2 x i8> %load to <2 x i16> + store <2 x i16> %ext, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i16: +define void @global_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(1)* %in + %ext = sext <2 x i8> %load to <2 x i16> + store <2 x i16> %ext, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16: +define void @global_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in + %ext = zext <4 x i8> %load to <4 x i16> + store <4 x i16> %ext, <4 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i16: +define void @global_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in + %ext = sext <4 x i8> %load to <4 x i16> + store <4 x i16> %ext, <4 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16: +define void @global_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(1)* %in + %ext = zext <8 x i8> %load to <8 x i16> + store <8 x i16> %ext, <8 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i16: +define void @global_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(1)* %in + %ext = sext <8 x i8> %load to <8 x i16> + store <8 x i16> %ext, <8 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16: +define void @global_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(1)* %in + %ext = zext <16 x i8> %load to <16 x i16> + store <16 x i16> %ext, <16 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i16: +define void @global_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(1)* %in + %ext = sext <16 x i8> %load to <16 x i16> + store <16 x i16> %ext, <16 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i16: +define void @global_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(1)* %in + %ext = zext <32 x i8> %load to <32 x i16> + store <32 x i16> %ext, <32 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i16: +define void @global_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(1)* %in + %ext = sext <32 x i8> %load to <32 x i16> + store <32 x i16> %ext, <32 x i16> addrspace(1)* %out + ret void +} + +; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16: +; define void @global_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in +; %ext = zext <64 x i8> %load to <64 x i16> +; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16: +; define void @global_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in +; %ext = sext <64 x i8> %load to <64 x i16> +; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out +; ret void +; } + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-i1.ll =================================================================== --- test/CodeGen/AMDGPU/load-i1.ll +++ /dev/null @@ -1,149 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - -; FUNC-LABEL: {{^}}global_copy_i1_to_i1: -; SI: buffer_load_ubyte -; SI: v_and_b32_e32 v{{[0-9]+}}, 1 -; SI: buffer_store_byte -; SI: s_endpgm - -; EG: VTX_READ_8 -; EG: AND_INT -define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { - %load = load i1, i1 addrspace(1)* %in - store i1 %load, i1 addrspace(1)* %out, align 1 - ret void -} - -; FUNC-LABEL: {{^}}local_copy_i1_to_i1: -; SI: ds_read_u8 -; SI: v_and_b32_e32 v{{[0-9]+}}, 1 -; SI: ds_write_b8 -; SI: s_endpgm - -; EG: LDS_UBYTE_READ_RET -; EG: AND_INT -; EG: LDS_BYTE_WRITE -define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind { - %load = load i1, i1 addrspace(3)* %in - store i1 %load, i1 addrspace(3)* %out, align 1 - ret void -} - -; FUNC-LABEL: {{^}}constant_copy_i1_to_i1: -; SI: buffer_load_ubyte -; SI: v_and_b32_e32 v{{[0-9]+}}, 1 -; SI: buffer_store_byte -; SI: s_endpgm - -; EG: VTX_READ_8 -; EG: AND_INT -define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind { - %load = load i1, i1 addrspace(2)* %in - store i1 %load, i1 addrspace(1)* %out, align 1 - ret void -} - -; FUNC-LABEL: {{^}}global_sextload_i1_to_i32: -; SI: buffer_load_ubyte -; SI: v_bfe_i32 -; SI: buffer_store_dword -; SI: s_endpgm - -; EG: VTX_READ_8 -; EG: BFE_INT -define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { - %load = load i1, i1 addrspace(1)* %in - %ext = sext i1 %load to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}global_zextload_i1_to_i32: -; SI: buffer_load_ubyte -; SI: buffer_store_dword -; SI: s_endpgm - -define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { - %load = load i1, i1 addrspace(1)* %in - %ext = zext i1 %load to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}global_sextload_i1_to_i64: -; SI: buffer_load_ubyte -; SI: v_bfe_i32 -; SI: buffer_store_dwordx2 -; SI: s_endpgm -define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { - %load = load i1, i1 addrspace(1)* %in - %ext = sext i1 %load to i64 - store i64 %ext, i64 addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}global_zextload_i1_to_i64: -; SI: buffer_load_ubyte -; SI: v_mov_b32_e32 {{v[0-9]+}}, 0 -; SI: buffer_store_dwordx2 -; SI: s_endpgm -define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind { - %load = load i1, i1 addrspace(1)* %in - %ext = zext i1 %load to i64 - store i64 %ext, i64 addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}i1_arg: -; SI: buffer_load_ubyte -; SI: v_and_b32_e32 -; SI: buffer_store_byte -; SI: s_endpgm -define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { - store i1 %x, i1 addrspace(1)* %out, align 1 - ret void -} - -; FUNC-LABEL: {{^}}i1_arg_zext_i32: -; SI: buffer_load_ubyte -; SI: buffer_store_dword -; SI: s_endpgm -define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { - %ext = zext i1 %x to i32 - store i32 %ext, i32 addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}i1_arg_zext_i64: -; SI: buffer_load_ubyte -; SI: buffer_store_dwordx2 -; SI: s_endpgm -define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { - %ext = zext i1 %x to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 - ret void -} - -; FUNC-LABEL: {{^}}i1_arg_sext_i32: -; SI: buffer_load_ubyte -; SI: buffer_store_dword -; SI: s_endpgm -define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { - %ext = sext i1 %x to i32 - store i32 %ext, i32addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}i1_arg_sext_i64: -; SI: buffer_load_ubyte -; SI: v_bfe_i32 -; SI: v_ashrrev_i32 -; SI: buffer_store_dwordx2 -; SI: s_endpgm -define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { - %ext = sext i1 %x to i64 - store i64 %ext, i64 addrspace(1)* %out, align 8 - ret void -} Index: test/CodeGen/AMDGPU/load-local-f32.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-local-f32.ll @@ -0,0 +1,117 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}load_f32_local: +; GCN: s_mov_b32 m0 +; GCN: ds_read_b32 + +; EG: LDS_READ_RET +define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) #0 { +entry: + %tmp0 = load float, float addrspace(3)* %in + store float %tmp0, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}load_v2f32_local: +; GCN: s_mov_b32 m0 +; GCN: ds_read_b64 + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) #0 { +entry: + %tmp0 = load <2 x float>, <2 x float> addrspace(3)* %in + store <2 x float> %tmp0, <2 x float> addrspace(1)* %out + ret void +} + +; FIXME: should only do one b64 load +; FUNC-LABEL: {{^}}local_load_v3f32: +; GCN-DAG: ds_read_b64 +; GCN-DAG: ds_read_b64 +; GCN: s_waitcnt +; GCN-DAG: ds_write_b64 +; GCN-DAG: ds_write_b32 + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define void @local_load_v3f32(<3 x float> addrspace(3)* %out, <3 x float> addrspace(3)* %in) #0 { +entry: + %tmp0 = load <3 x float>, <3 x float> addrspace(3)* %in + store <3 x float> %tmp0, <3 x float> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v4f32: +; GCN: ds_read_b64 +; GCN: ds_read_b64 + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define void @local_load_v4f32(<4 x float> addrspace(3)* %out, <4 x float> addrspace(3)* %in) #0 { +entry: + %tmp0 = load <4 x float>, <4 x float> addrspace(3)* %in + store <4 x float> %tmp0, <4 x float> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v8f32: +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define void @local_load_v8f32(<8 x float> addrspace(3)* %out, <8 x float> addrspace(3)* %in) #0 { +entry: + %tmp0 = load <8 x float>, <8 x float> addrspace(3)* %in + store <8 x float> %tmp0, <8 x float> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v16f32: +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define void @local_load_v16f32(<16 x float> addrspace(3)* %out, <16 x float> addrspace(3)* %in) #0 { +entry: + %tmp0 = load <16 x float>, <16 x float> addrspace(3)* %in + store <16 x float> %tmp0, <16 x float> addrspace(3)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-local-i1.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-local-i1.ll @@ -0,0 +1,371 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}local_load_i1: +; GCN: ds_read_u8 +; GCN: v_and_b32_e32 v{{[0-9]+}}, 1 +; GCN: ds_write_b8 + +; EG: LDS_UBYTE_READ_RET +; EG: AND_INT +; EG: LDS_BYTE_WRITE +define void @local_load_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) #0 { + %load = load i1, i1 addrspace(3)* %in + store i1 %load, i1 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v2i1: +define void @local_load_v2i1(<2 x i1> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(3)* %in + store <2 x i1> %load, <2 x i1> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v3i1: +define void @local_load_v3i1(<3 x i1> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(3)* %in + store <3 x i1> %load, <3 x i1> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v4i1: +define void @local_load_v4i1(<4 x i1> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(3)* %in + store <4 x i1> %load, <4 x i1> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v8i1: +define void @local_load_v8i1(<8 x i1> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(3)* %in + store <8 x i1> %load, <8 x i1> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v16i1: +define void @local_load_v16i1(<16 x i1> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(3)* %in + store <16 x i1> %load, <16 x i1> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v32i1: +define void @local_load_v32i1(<32 x i1> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(3)* %in + store <32 x i1> %load, <32 x i1> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v64i1: +define void @local_load_v64i1(<64 x i1> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(3)* %in + store <64 x i1> %load, <64 x i1> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_i1_to_i32: +; GCN: ds_read_u8 +; GCN: ds_write_b32 +define void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 { + %a = load i1, i1 addrspace(3)* %in + %ext = zext i1 %a to i32 + store i32 %ext, i32 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_i1_to_i32: +; GCN: ds_read_u8 +; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}} +; GCN: ds_write_b32 + +; EG: LDS_UBYTE_READ_RET +; EG: BFE_INT +define void @local_sextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 { + %a = load i1, i1 addrspace(3)* %in + %ext = sext i1 %a to i32 + store i32 %ext, i32 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i32: +define void @local_zextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(3)* %in + %ext = zext <1 x i1> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i32: +define void @local_sextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(3)* %in + %ext = sext <1 x i1> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i32: +define void @local_zextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(3)* %in + %ext = zext <2 x i1> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i32: +define void @local_sextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(3)* %in + %ext = sext <2 x i1> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i32: +define void @local_zextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(3)* %in + %ext = zext <3 x i1> %load to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i32: +define void @local_sextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(3)* %in + %ext = sext <3 x i1> %load to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i32: +define void @local_zextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(3)* %in + %ext = zext <4 x i1> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i32: +define void @local_sextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(3)* %in + %ext = sext <4 x i1> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i32: +define void @local_zextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(3)* %in + %ext = zext <8 x i1> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i32: +define void @local_sextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(3)* %in + %ext = sext <8 x i1> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i32: +define void @local_zextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(3)* %in + %ext = zext <16 x i1> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i32: +define void @local_sextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(3)* %in + %ext = sext <16 x i1> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i32: +define void @local_zextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(3)* %in + %ext = zext <32 x i1> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i32: +define void @local_sextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(3)* %in + %ext = sext <32 x i1> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i32: +define void @local_zextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(3)* %in + %ext = zext <64 x i1> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i32: +define void @local_sextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(3)* %in + %ext = sext <64 x i1> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_i1_to_i64: +; GCN-DAG: ds_read_u8 [[LOAD:v[0-9]+]], +; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} +; GCN: ds_write_b64 +define void @local_zextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 { + %a = load i1, i1 addrspace(3)* %in + %ext = zext i1 %a to i64 + store i64 %ext, i64 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_i1_to_i64: +; GCN: ds_read_u8 [[LOAD:v[0-9]+]], +; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}} +; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]] +; GCN: ds_write_b64 +define void @local_sextload_i1_to_i64(i64 addrspace(3)* %out, i1 addrspace(3)* %in) #0 { + %a = load i1, i1 addrspace(3)* %in + %ext = sext i1 %a to i64 + store i64 %ext, i64 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i64: +define void @local_zextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(3)* %in + %ext = zext <1 x i1> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i64: +define void @local_sextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 { + %load = load <1 x i1>, <1 x i1> addrspace(3)* %in + %ext = sext <1 x i1> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i64: +define void @local_zextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(3)* %in + %ext = zext <2 x i1> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i64: +define void @local_sextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 { + %load = load <2 x i1>, <2 x i1> addrspace(3)* %in + %ext = sext <2 x i1> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i64: +define void @local_zextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(3)* %in + %ext = zext <3 x i1> %load to <3 x i64> + store <3 x i64> %ext, <3 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i64: +define void @local_sextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 { + %load = load <3 x i1>, <3 x i1> addrspace(3)* %in + %ext = sext <3 x i1> %load to <3 x i64> + store <3 x i64> %ext, <3 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i64: +define void @local_zextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(3)* %in + %ext = zext <4 x i1> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i64: +define void @local_sextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 { + %load = load <4 x i1>, <4 x i1> addrspace(3)* %in + %ext = sext <4 x i1> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i64: +define void @local_zextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(3)* %in + %ext = zext <8 x i1> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i64: +define void @local_sextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 { + %load = load <8 x i1>, <8 x i1> addrspace(3)* %in + %ext = sext <8 x i1> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i64: +define void @local_zextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(3)* %in + %ext = zext <16 x i1> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i64: +define void @local_sextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 { + %load = load <16 x i1>, <16 x i1> addrspace(3)* %in + %ext = sext <16 x i1> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i64: +define void @local_zextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(3)* %in + %ext = zext <32 x i1> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i64: +define void @local_sextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 { + %load = load <32 x i1>, <32 x i1> addrspace(3)* %in + %ext = sext <32 x i1> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i64: +define void @local_zextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(3)* %in + %ext = zext <64 x i1> %load to <64 x i64> + store <64 x i64> %ext, <64 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i64: +define void @local_sextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 { + %load = load <64 x i1>, <64 x i1> addrspace(3)* %in + %ext = sext <64 x i1> %load to <64 x i64> + store <64 x i64> %ext, <64 x i64> addrspace(3)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-local-i16.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-local-i16.ll @@ -0,0 +1,611 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}local_load_i16: +; GCN: ds_read_u16 v{{[0-9]+}} + +; EG: LDS_USHORT_READ_RET +define void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) { +entry: + %ld = load i16, i16 addrspace(3)* %in + store i16 %ld, i16 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v2i16: +; GCN: ds_read_b32 + +; EG: LDS_READ_RET +define void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) { +entry: + %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in + store <2 x i16> %ld, <2 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v3i16: +; GCN: ds_read_b64 +; GCN-DAG: ds_write_b32 +; GCN-DAG: ds_write_b16 + +; EG-DAG: LDS_USHORT_READ_RET +; EG-DAG: LDS_READ_RET +define void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { +entry: + %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in + store <3 x i16> %ld, <3 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v4i16: +; GCN: ds_read_b64 + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) { +entry: + %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in + store <4 x i16> %ld, <4 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v8i16: +; GCN: ds_read_b64 +; GCN: ds_read_b64 + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) { +entry: + %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in + store <8 x i16> %ld, <8 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v16i16: +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 + + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) { +entry: + %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in + store <16 x i16> %ld, <16 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_i16_to_i32: +; GCN: ds_read_u16 +; GCN: ds_write_b32 + +; EG: LDS_USHORT_READ_RET +define void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { + %a = load i16, i16 addrspace(3)* %in + %ext = zext i16 %a to i32 + store i32 %ext, i32 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_i16_to_i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_i16 + +; EG: LDS_USHORT_READ_RET +; EG: BFE_INT +define void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { + %a = load i16, i16 addrspace(3)* %in + %ext = sext i16 %a to i32 + store i32 %ext, i32 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32: +; GCN: ds_read_u16 +define void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(3)* %in + %ext = zext <1 x i16> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32: +; GCN: ds_read_i16 +define void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(3)* %in + %ext = sext <1 x i16> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_u16 +; GCN: ds_read_u16 + +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET +define void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(3)* %in + %ext = zext <2 x i16> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_i16 +; GCN: ds_read_i16 + +; EG-DAG: LDS_USHORT_READ_RET +; EG-DAG: LDS_USHORT_READ_RET +; EG-DAG: BFE_INT +; EG-DAG: BFE_INT +define void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(3)* %in + %ext = sext <2 x i16> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32: +; GCN: ds_read_b64 +; GCN-DAG: ds_write_b32 +; GCN-DAG: ds_write_b64 +define void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { +entry: + %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in + %ext = zext <3 x i16> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32: +; GCN: ds_read_b64 +; GCN-DAG: ds_write_b32 +; GCN-DAG: ds_write_b64 +define void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { +entry: + %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in + %ext = sext <3 x i16> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 + +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET +define void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(3)* %in + %ext = zext <4 x i16> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 + +; EG-DAG: LDS_USHORT_READ_RET +; EG-DAG: LDS_USHORT_READ_RET +; EG-DAG: LDS_USHORT_READ_RET +; EG-DAG: LDS_USHORT_READ_RET +; EG-DAG: BFE_INT +; EG-DAG: BFE_INT +; EG-DAG: BFE_INT +; EG-DAG: BFE_INT +define void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(3)* %in + %ext = sext <4 x i16> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32: +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +define void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(3)* %in + %ext = zext <8 x i16> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32: +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +define void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(3)* %in + %ext = sext <8 x i16> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32: +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +define void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(3)* %in + %ext = zext <16 x i16> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32: +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +define void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(3)* %in + %ext = sext <16 x i16> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32: +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 + +define void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(3)* %in + %ext = zext <32 x i16> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32: +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +; GCN: ds_read_i16 +define void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(3)* %in + %ext = sext <32 x i16> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32: +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_read_u16 + +define void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { + %load = load <64 x i16>, <64 x i16> addrspace(3)* %in + %ext = zext <64 x i16> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32: +define void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { + %load = load <64 x i16>, <64 x i16> addrspace(3)* %in + %ext = sext <64 x i16> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_i16_to_i64: +; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]], +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} + +; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] +define void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { + %a = load i16, i16 addrspace(3)* %in + %ext = zext i16 %a to i64 + store i64 %ext, i64 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_i16_to_i64: +; GCN: ds_read_i16 v[[LO:[0-9]+]], +; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] + +; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] +define void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { + %a = load i16, i16 addrspace(3)* %in + %ext = sext i16 %a to i64 + store i64 %ext, i64 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64: +define void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(3)* %in + %ext = zext <1 x i16> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64: +define void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { + %load = load <1 x i16>, <1 x i16> addrspace(3)* %in + %ext = sext <1 x i16> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64: +define void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(3)* %in + %ext = zext <2 x i16> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64: +define void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(3)* %in + %ext = sext <2 x i16> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64: +define void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(3)* %in + %ext = zext <4 x i16> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64: +define void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { + %load = load <4 x i16>, <4 x i16> addrspace(3)* %in + %ext = sext <4 x i16> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64: +define void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(3)* %in + %ext = zext <8 x i16> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64: +define void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { + %load = load <8 x i16>, <8 x i16> addrspace(3)* %in + %ext = sext <8 x i16> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64: +define void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(3)* %in + %ext = zext <16 x i16> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64: +define void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { + %load = load <16 x i16>, <16 x i16> addrspace(3)* %in + %ext = sext <16 x i16> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64: +define void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(3)* %in + %ext = zext <32 x i16> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64: +define void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { + %load = load <32 x i16>, <32 x i16> addrspace(3)* %in + %ext = sext <32 x i16> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(3)* %out + ret void +} + +; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64: +; define void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { +; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in +; %ext = zext <64 x i16> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out +; ret void +; } + +; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64: +; define void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { +; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in +; %ext = sext <64 x i16> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out +; ret void +; } + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-local-i32.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-local-i32.ll @@ -0,0 +1,188 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +; FUNC-LABEL: {{^}}local_load_i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0, -1 +; GCN: ds_read_b32 + +; EG: LDS_READ_RET +define void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 { +entry: + %ld = load i32, i32 addrspace(3)* %in + store i32 %ld, i32 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v2i32: +; GCN: ds_read_b64 +define void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 { +entry: + %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in + store <2 x i32> %ld, <2 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v3i32: +; GCN-DAG: ds_read_b64 +; GCN-DAG: ds_read_b32 +define void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 { +entry: + %ld = load <3 x i32>, <3 x i32> addrspace(3)* %in + store <3 x i32> %ld, <3 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v4i32: +; GCN: ds_read_b64 +; GCN: ds_read_b64 +define void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 { +entry: + %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in + store <4 x i32> %ld, <4 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v8i32: +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +define void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 { +entry: + %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in + store <8 x i32> %ld, <8 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v16i32: +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +; GCN: ds_read_b64 +define void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 { +entry: + %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in + store <16 x i32> %ld, <16 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_i32_to_i64: +define void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 { + %ld = load i32, i32 addrspace(3)* %in + %ext = zext i32 %ld to i64 + store i64 %ext, i64 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_i32_to_i64: +define void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 { + %ld = load i32, i32 addrspace(3)* %in + %ext = sext i32 %ld to i64 + store i64 %ext, i64 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64: +define void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 { + %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in + %ext = zext <1 x i32> %ld to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64: +define void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 { + %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in + %ext = sext <1 x i32> %ld to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64: +define void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 { + %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in + %ext = zext <2 x i32> %ld to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64: +define void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 { + %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in + %ext = sext <2 x i32> %ld to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64: +define void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 { + %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in + %ext = zext <4 x i32> %ld to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64: +define void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 { + %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in + %ext = sext <4 x i32> %ld to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64: +define void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 { + %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in + %ext = zext <8 x i32> %ld to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64: +define void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 { + %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in + %ext = sext <8 x i32> %ld to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64: +define void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 { + %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in + %ext = sext <16 x i32> %ld to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64 +define void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 { + %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in + %ext = zext <16 x i32> %ld to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64: +define void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 { + %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in + %ext = sext <32 x i32> %ld to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64: +define void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 { + %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in + %ext = zext <32 x i32> %ld to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(3)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load-local-i8.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/load-local-i8.ll @@ -0,0 +1,562 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +; FUNC-LABEL: {{^}}local_load_i8: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_u8 + +; EG: LDS_UBYTE_READ_RET +define void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { +entry: + %ld = load i8, i8 addrspace(3)* %in + store i8 %ld, i8 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v2i8: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_u16 + +; EG: LDS_USHORT_READ_RET +define void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { +entry: + %ld = load <2 x i8>, <2 x i8> addrspace(3)* %in + store <2 x i8> %ld, <2 x i8> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v3i8: +; GCN: ds_read_b32 + +; EG: DS_READ_RET +define void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 { +entry: + %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in + store <3 x i8> %ld, <3 x i8> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v4i8: +; GCN: ds_read_b32 + +; EG: LDS_READ_RET +define void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { +entry: + %ld = load <4 x i8>, <4 x i8> addrspace(3)* %in + store <4 x i8> %ld, <4 x i8> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v8i8: +; GCN: ds_read_b64 + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { +entry: + %ld = load <8 x i8>, <8 x i8> addrspace(3)* %in + store <8 x i8> %ld, <8 x i8> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_load_v16i8: +; GCN: ds_read_b64 +; GCN: ds_read_b64 + +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +; EG: LDS_READ_RET +define void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { +entry: + %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in + store <16 x i8> %ld, <16 x i8> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_i8_to_i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_u8 + +; EG: LDS_UBYTE_READ_RET +define void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { + %a = load i8, i8 addrspace(3)* %in + %ext = zext i8 %a to i32 + store i32 %ext, i32 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_i8_to_i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_i8 + +; EG: LDS_UBYTE_READ_RET +; EG: BFE_INT +define void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { + %ld = load i8, i8 addrspace(3)* %in + %ext = sext i8 %ld to i32 + store i32 %ext, i32 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32: +define void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(3)* %in + %ext = zext <1 x i8> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32: +define void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(3)* %in + %ext = sext <1 x i8> %load to <1 x i32> + store <1 x i32> %ext, <1 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32: +; GCN: ds_read_u8 +; GCN: ds_read_u8 + +; EG: LDS_UBYTE_READ_RET +; EG: LDS_UBYTE_READ_RET +define void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(3)* %in + %ext = zext <2 x i8> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_i8 +; GCN: ds_read_i8 + +; EG-DAG: LDS_UBYTE_READ_RET +; EG-DAG: LDS_UBYTE_READ_RET +; EG-DAG: BFE_INT +; EG-DAG: BFE_INT +define void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(3)* %in + %ext = sext <2 x i8> %load to <2 x i32> + store <2 x i32> %ext, <2 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32: +; GCN: ds_read_b32 + +; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 +; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8 +; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff, +define void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 { +entry: + %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in + %ext = zext <3 x i8> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_b32 + +; GCN-DAG: v_bfe_i32 +; GCN-DAG: v_bfe_i32 +; GCN-DAG: v_bfe_i32 +; GCN-DAG: v_bfe_i32 + +; GCN-DAG: ds_write_b64 +; GCN-DAG: ds_write_b32 + +define void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 { +entry: + %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in + %ext = sext <3 x i8> %ld to <3 x i32> + store <3 x i32> %ext, <3 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 + +; EG: LDS_UBYTE_READ_RET +; EG: LDS_UBYTE_READ_RET +; EG: LDS_UBYTE_READ_RET +; EG: LDS_UBYTE_READ_RET +define void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(3)* %in + %ext = zext <4 x i8> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32: +; GCN-NOT: s_wqm_b64 +; GCN: s_mov_b32 m0 +; GCN: ds_read_i8 +; GCN: ds_read_i8 +; GCN: ds_read_i8 +; GCN: ds_read_i8 + +; EG-DAG: LDS_UBYTE_READ_RET +; EG-DAG: LDS_UBYTE_READ_RET +; EG-DAG: LDS_UBYTE_READ_RET +; EG-DAG: LDS_UBYTE_READ_RET +; EG-DAG: BFE_INT +; EG-DAG: BFE_INT +; EG-DAG: BFE_INT +; EG-DAG: BFE_INT +define void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(3)* %in + %ext = sext <4 x i8> %load to <4 x i32> + store <4 x i32> %ext, <4 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32: +define void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(3)* %in + %ext = zext <8 x i8> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32: +define void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(3)* %in + %ext = sext <8 x i8> %load to <8 x i32> + store <8 x i32> %ext, <8 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32: +define void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(3)* %in + %ext = zext <16 x i8> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32: +define void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(3)* %in + %ext = sext <16 x i8> %load to <16 x i32> + store <16 x i32> %ext, <16 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32: +define void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(3)* %in + %ext = zext <32 x i8> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32: +define void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(3)* %in + %ext = sext <32 x i8> %load to <32 x i32> + store <32 x i32> %ext, <32 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32: +define void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { + %load = load <64 x i8>, <64 x i8> addrspace(3)* %in + %ext = zext <64 x i8> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32: +define void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { + %load = load <64 x i8>, <64 x i8> addrspace(3)* %in + %ext = sext <64 x i8> %load to <64 x i32> + store <64 x i32> %ext, <64 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_i8_to_i64: +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]], +; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] +define void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { + %a = load i8, i8 addrspace(3)* %in + %ext = zext i8 %a to i64 + store i64 %ext, i64 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_i8_to_i64: +; GCN: ds_read_i8 v[[LO:[0-9]+]], +; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] + +; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { + %a = load i8, i8 addrspace(3)* %in + %ext = sext i8 %a to i64 + store i64 %ext, i64 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64: +define void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(3)* %in + %ext = zext <1 x i8> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64: +define void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(3)* %in + %ext = sext <1 x i8> %load to <1 x i64> + store <1 x i64> %ext, <1 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64: +define void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(3)* %in + %ext = zext <2 x i8> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64: +define void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(3)* %in + %ext = sext <2 x i8> %load to <2 x i64> + store <2 x i64> %ext, <2 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64: +define void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(3)* %in + %ext = zext <4 x i8> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64: +define void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(3)* %in + %ext = sext <4 x i8> %load to <4 x i64> + store <4 x i64> %ext, <4 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64: +define void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(3)* %in + %ext = zext <8 x i8> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64: +define void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(3)* %in + %ext = sext <8 x i8> %load to <8 x i64> + store <8 x i64> %ext, <8 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64: +define void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(3)* %in + %ext = zext <16 x i8> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64: +define void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(3)* %in + %ext = sext <16 x i8> %load to <16 x i64> + store <16 x i64> %ext, <16 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64: +define void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(3)* %in + %ext = zext <32 x i8> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64: +define void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(3)* %in + %ext = sext <32 x i8> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(3)* %out + ret void +} + +; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64: +; define void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in +; %ext = zext <64 x i8> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64: +; define void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in +; %ext = sext <64 x i8> %load to <64 x i64> +; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out +; ret void +; } + +; FUNC-LABEL: {{^}}local_zextload_i8_to_i16: +; GCN: ds_read_u8 v[[VAL:[0-9]+]], +; GCN: ds_write_b16 v[[VAL:[0-9]+]] +define void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { + %a = load i8, i8 addrspace(3)* %in + %ext = zext i8 %a to i16 + store i16 %ext, i16 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_i8_to_i16: +; GCN: ds_read_i8 v[[VAL:[0-9]+]], +; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]] +define void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { + %a = load i8, i8 addrspace(3)* %in + %ext = sext i8 %a to i16 + store i16 %ext, i16 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16: +define void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(3)* %in + %ext = zext <1 x i8> %load to <1 x i16> + store <1 x i16> %ext, <1 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16: +define void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { + %load = load <1 x i8>, <1 x i8> addrspace(3)* %in + %ext = sext <1 x i8> %load to <1 x i16> + store <1 x i16> %ext, <1 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16: +define void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(3)* %in + %ext = zext <2 x i8> %load to <2 x i16> + store <2 x i16> %ext, <2 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16: +define void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { + %load = load <2 x i8>, <2 x i8> addrspace(3)* %in + %ext = sext <2 x i8> %load to <2 x i16> + store <2 x i16> %ext, <2 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16: +define void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(3)* %in + %ext = zext <4 x i8> %load to <4 x i16> + store <4 x i16> %ext, <4 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16: +define void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(3)* %in + %ext = sext <4 x i8> %load to <4 x i16> + store <4 x i16> %ext, <4 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16: +define void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(3)* %in + %ext = zext <8 x i8> %load to <8 x i16> + store <8 x i16> %ext, <8 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16: +define void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { + %load = load <8 x i8>, <8 x i8> addrspace(3)* %in + %ext = sext <8 x i8> %load to <8 x i16> + store <8 x i16> %ext, <8 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16: +define void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(3)* %in + %ext = zext <16 x i8> %load to <16 x i16> + store <16 x i16> %ext, <16 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16: +define void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { + %load = load <16 x i8>, <16 x i8> addrspace(3)* %in + %ext = sext <16 x i8> %load to <16 x i16> + store <16 x i16> %ext, <16 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16: +define void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(3)* %in + %ext = zext <32 x i8> %load to <32 x i16> + store <32 x i16> %ext, <32 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16: +define void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { + %load = load <32 x i8>, <32 x i8> addrspace(3)* %in + %ext = sext <32 x i8> %load to <32 x i16> + store <32 x i16> %ext, <32 x i16> addrspace(3)* %out + ret void +} + +; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16: +; define void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in +; %ext = zext <64 x i8> %load to <64 x i16> +; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out +; ret void +; } + +; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16: +; define void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { +; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in +; %ext = sext <64 x i8> %load to <64 x i16> +; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out +; ret void +; } + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/load.ll =================================================================== --- test/CodeGen/AMDGPU/load.ll +++ /dev/null @@ -1,737 +0,0 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=CI-HSA -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s - -;===------------------------------------------------------------------------===; -; GLOBAL ADDRESS SPACE -;===------------------------------------------------------------------------===; - -; Load an i8 value from the global address space. -; FUNC-LABEL: {{^}}load_i8: -; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} - -; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}}, -; CI-HSA: flat_load_ubyte -define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { - %1 = load i8, i8 addrspace(1)* %in - %2 = zext i8 %1 to i32 - store i32 %2, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_i8_sext: -; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] -; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal -; R600: 8 -; SI-NOHSA: buffer_load_sbyte -; CI-HSA: flat_load_sbyte -define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { -entry: - %0 = load i8, i8 addrspace(1)* %in - %1 = sext i8 %0 to i32 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v2i8: -; R600: VTX_READ_8 -; R600: VTX_READ_8 -; SI-NOHSA: buffer_load_ubyte -; SI-NOHSA: buffer_load_ubyte -; CI-HSA: flat_load_ubyte -; CI-HSA: flat_load_ubyte -define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { -entry: - %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in - %1 = zext <2 x i8> %0 to <2 x i32> - store <2 x i32> %1, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v2i8_sext: -; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] -; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal -; R600-DAG: 8 -; R600-DAG: 8 - -; SI-NOHSA: buffer_load_sbyte -; SI-NOHSA: buffer_load_sbyte -; CI-HSA: flat_load_sbyte -; CI-HSA: flat_load_sbyte -define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { -entry: - %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in - %1 = sext <2 x i8> %0 to <2 x i32> - store <2 x i32> %1, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v4i8: -; R600: VTX_READ_8 -; R600: VTX_READ_8 -; R600: VTX_READ_8 -; R600: VTX_READ_8 -; SI-NOHSA: buffer_load_ubyte -; SI-NOHSA: buffer_load_ubyte -; SI-NOHSA: buffer_load_ubyte -; SI-NOHSA: buffer_load_ubyte -; CI-HSA: flat_load_ubyte -; CI-HSA: flat_load_ubyte -; CI-HSA: flat_load_ubyte -; CI-HSA: flat_load_ubyte -define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { -entry: - %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in - %1 = zext <4 x i8> %0 to <4 x i32> - store <4 x i32> %1, <4 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v4i8_sext: -; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] -; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] -; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] -; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal -; R600-DAG: 8 -; R600-DAG: 8 -; R600-DAG: 8 -; R600-DAG: 8 -; SI-NOHSA: buffer_load_sbyte -; SI-NOHSA: buffer_load_sbyte -; SI-NOHSA: buffer_load_sbyte -; SI-NOHSA: buffer_load_sbyte -; CI-HSA: flat_load_sbyte -; CI-HSA: flat_load_sbyte -; CI-HSA: flat_load_sbyte -; CI-HSA: flat_load_sbyte -define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { -entry: - %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in - %1 = sext <4 x i8> %0 to <4 x i32> - store <4 x i32> %1, <4 x i32> addrspace(1)* %out - ret void -} - -; Load an i16 value from the global address space. -; FUNC-LABEL: {{^}}load_i16: -; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-NOHSA: buffer_load_ushort -; CI-HSA: flat_load_ushort -define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { -entry: - %0 = load i16 , i16 addrspace(1)* %in - %1 = zext i16 %0 to i32 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_i16_sext: -; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] -; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal -; R600: 16 -; SI-NOHSA: buffer_load_sshort -; CI-HSA: flat_load_sshort -define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { -entry: - %0 = load i16, i16 addrspace(1)* %in - %1 = sext i16 %0 to i32 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v2i16: -; R600: VTX_READ_16 -; R600: VTX_READ_16 -; SI-NOHSA: buffer_load_ushort -; SI-NOHSA: buffer_load_ushort -; CI-HSA: flat_load_ushort -; CI-HSA: flat_load_ushort -define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { -entry: - %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in - %1 = zext <2 x i16> %0 to <2 x i32> - store <2 x i32> %1, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v2i16_sext: -; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] -; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal -; R600-DAG: 16 -; R600-DAG: 16 -; SI-NOHSA: buffer_load_sshort -; SI-NOHSA: buffer_load_sshort -; CI-HSA: flat_load_sshort -; CI-HSA: flat_load_sshort -define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { -entry: - %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in - %1 = sext <2 x i16> %0 to <2 x i32> - store <2 x i32> %1, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v4i16: -; R600: VTX_READ_16 -; R600: VTX_READ_16 -; R600: VTX_READ_16 -; R600: VTX_READ_16 -; SI-NOHSA: buffer_load_ushort -; SI-NOHSA: buffer_load_ushort -; SI-NOHSA: buffer_load_ushort -; SI-NOHSA: buffer_load_ushort -; CI-HSA: flat_load_ushort -; CI-HSA: flat_load_ushort -; CI-HSA: flat_load_ushort -; CI-HSA: flat_load_ushort -define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { -entry: - %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in - %1 = zext <4 x i16> %0 to <4 x i32> - store <4 x i32> %1, <4 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v4i16_sext: -; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] -; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] -; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] -; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal -; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal -; R600-DAG: 16 -; R600-DAG: 16 -; R600-DAG: 16 -; R600-DAG: 16 -; SI-NOHSA: buffer_load_sshort -; SI-NOHSA: buffer_load_sshort -; SI-NOHSA: buffer_load_sshort -; SI-NOHSA: buffer_load_sshort -; CI-HSA: flat_load_sshort -; CI-HSA: flat_load_sshort -; CI-HSA: flat_load_sshort -; CI-HSA: flat_load_sshort -define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { -entry: - %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in - %1 = sext <4 x i16> %0 to <4 x i32> - store <4 x i32> %1, <4 x i32> addrspace(1)* %out - ret void -} - -; load an i32 value from the global address space. -; FUNC-LABEL: {{^}}load_i32: -; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 - -; SI-NOHSA: buffer_load_dword v{{[0-9]+}} -; CI-HSA: flat_load_dword -define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { -entry: - %0 = load i32, i32 addrspace(1)* %in - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; load a f32 value from the global address space. -; FUNC-LABEL: {{^}}load_f32: -; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 - -; SI-NOHSA: buffer_load_dword v{{[0-9]+}} -; CI-HSA: flat_load_dword -define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) { -entry: - %0 = load float, float addrspace(1)* %in - store float %0, float addrspace(1)* %out - ret void -} - -; load a v2f32 value from the global address space -; FUNC-LABEL: {{^}}load_v2f32: -; R600: MEM_RAT -; R600: VTX_READ_64 -; SI-NOHSA: buffer_load_dwordx2 -; CI-HSA: flat_load_dwordx2 -define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { -entry: - %0 = load <2 x float>, <2 x float> addrspace(1)* %in - store <2 x float> %0, <2 x float> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_i64: -; R600: VTX_READ_64 -; SI-NOHSA: buffer_load_dwordx2 -; CI-HSA: flat_load_dwordx2 -define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { -entry: - %0 = load i64, i64 addrspace(1)* %in - store i64 %0, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_i64_sext: -; R600: MEM_RAT -; R600: MEM_RAT -; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x -; R600: 31 -; SI-NOHSA: buffer_load_dword -; CI-HSA: flat_load_dword - -define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { -entry: - %0 = load i32, i32 addrspace(1)* %in - %1 = sext i32 %0 to i64 - store i64 %1, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_i64_zext: -; R600: MEM_RAT -; R600: MEM_RAT -define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { -entry: - %0 = load i32, i32 addrspace(1)* %in - %1 = zext i32 %0 to i64 - store i64 %1, i64 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v8i32: -; R600: VTX_READ_128 -; R600: VTX_READ_128 - -; SI-NOHSA: buffer_load_dwordx4 -; SI-NOHSA: buffer_load_dwordx4 -; CI-HSA: flat_load_dwordx4 -; CI-HSA: flat_load_dwordx4 -define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) { -entry: - %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in - store <8 x i32> %0, <8 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v16i32: -; R600: VTX_READ_128 -; R600: VTX_READ_128 -; R600: VTX_READ_128 -; R600: VTX_READ_128 - -; SI-NOHSA: buffer_load_dwordx4 -; SI-NOHSA: buffer_load_dwordx4 -; SI-NOHSA: buffer_load_dwordx4 -; SI-NOHSA: buffer_load_dwordx4 -; CI-HSA: flat_load_dwordx4 -; CI-HSA: flat_load_dwordx4 -; CI-HSA: flat_load_dwordx4 -; CI-HSA: flat_load_dwordx4 -define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) { -entry: - %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in - store <16 x i32> %0, <16 x i32> addrspace(1)* %out - ret void -} - -;===------------------------------------------------------------------------===; -; CONSTANT ADDRESS SPACE -;===------------------------------------------------------------------------===; - -; Load a sign-extended i8 value -; FUNC-LABEL: {{^}}load_const_i8_sext: -; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] -; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal -; R600: 8 -; SI-NOHSA: buffer_load_sbyte v{{[0-9]+}}, -; CI-HSA: flat_load_sbyte v{{[0-9]+}}, -define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { -entry: - %0 = load i8, i8 addrspace(2)* %in - %1 = sext i8 %0 to i32 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; Load an aligned i8 value -; FUNC-LABEL: {{^}}load_const_i8_aligned: -; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}}, -; CI-HSA: flat_load_ubyte v{{[0-9]+}}, -define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { -entry: - %0 = load i8, i8 addrspace(2)* %in - %1 = zext i8 %0 to i32 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; Load an un-aligned i8 value -; FUNC-LABEL: {{^}}load_const_i8_unaligned: -; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-NOHSA: buffer_load_ubyte v{{[0-9]+}}, -; CI-HSA: flat_load_ubyte v{{[0-9]+}}, -define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { -entry: - %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1 - %1 = load i8, i8 addrspace(2)* %0 - %2 = zext i8 %1 to i32 - store i32 %2, i32 addrspace(1)* %out - ret void -} - -; Load a sign-extended i16 value -; FUNC-LABEL: {{^}}load_const_i16_sext: -; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] -; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal -; R600: 16 -; SI-NOHSA: buffer_load_sshort -; CI-HSA: flat_load_sshort -define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { -entry: - %0 = load i16, i16 addrspace(2)* %in - %1 = sext i16 %0 to i32 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; Load an aligned i16 value -; FUNC-LABEL: {{^}}load_const_i16_aligned: -; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-NOHSA: buffer_load_ushort -; CI-HSA: flat_load_ushort -define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { -entry: - %0 = load i16, i16 addrspace(2)* %in - %1 = zext i16 %0 to i32 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; Load an un-aligned i16 value -; FUNC-LABEL: {{^}}load_const_i16_unaligned: -; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-NOHSA: buffer_load_ushort -; CI-HSA: flat_load_ushort -define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { -entry: - %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1 - %1 = load i16, i16 addrspace(2)* %0 - %2 = zext i16 %1 to i32 - store i32 %2, i32 addrspace(1)* %out - ret void -} - -; Load an i32 value from the constant address space. -; FUNC-LABEL: {{^}}load_const_addrspace_i32: -; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 - -; SI: s_load_dword s{{[0-9]+}} -define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { -entry: - %0 = load i32, i32 addrspace(2)* %in - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; Load a f32 value from the constant address space. -; FUNC-LABEL: {{^}}load_const_addrspace_f32: -; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 - -; SI: s_load_dword s{{[0-9]+}} -define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { - %1 = load float, float addrspace(2)* %in - store float %1, float addrspace(1)* %out - ret void -} - -;===------------------------------------------------------------------------===; -; LOCAL ADDRESS SPACE -;===------------------------------------------------------------------------===; - -; Load an i8 value from the local address space. -; FUNC-LABEL: {{^}}load_i8_local: -; R600: LDS_UBYTE_READ_RET -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_u8 -define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { - %1 = load i8, i8 addrspace(3)* %in - %2 = zext i8 %1 to i32 - store i32 %2, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_i8_sext_local: -; R600: LDS_UBYTE_READ_RET -; R600: BFE_INT -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_i8 -define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { -entry: - %0 = load i8, i8 addrspace(3)* %in - %1 = sext i8 %0 to i32 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v2i8_local: -; R600: LDS_UBYTE_READ_RET -; R600: LDS_UBYTE_READ_RET -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_u8 -; SI: ds_read_u8 -define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { -entry: - %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in - %1 = zext <2 x i8> %0 to <2 x i32> - store <2 x i32> %1, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v2i8_sext_local: -; R600-DAG: LDS_UBYTE_READ_RET -; R600-DAG: LDS_UBYTE_READ_RET -; R600-DAG: BFE_INT -; R600-DAG: BFE_INT -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_i8 -; SI: ds_read_i8 -define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { -entry: - %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in - %1 = sext <2 x i8> %0 to <2 x i32> - store <2 x i32> %1, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v4i8_local: -; R600: LDS_UBYTE_READ_RET -; R600: LDS_UBYTE_READ_RET -; R600: LDS_UBYTE_READ_RET -; R600: LDS_UBYTE_READ_RET -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { -entry: - %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in - %1 = zext <4 x i8> %0 to <4 x i32> - store <4 x i32> %1, <4 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v4i8_sext_local: -; R600-DAG: LDS_UBYTE_READ_RET -; R600-DAG: LDS_UBYTE_READ_RET -; R600-DAG: LDS_UBYTE_READ_RET -; R600-DAG: LDS_UBYTE_READ_RET -; R600-DAG: BFE_INT -; R600-DAG: BFE_INT -; R600-DAG: BFE_INT -; R600-DAG: BFE_INT -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_i8 -; SI: ds_read_i8 -; SI: ds_read_i8 -; SI: ds_read_i8 -define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { -entry: - %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in - %1 = sext <4 x i8> %0 to <4 x i32> - store <4 x i32> %1, <4 x i32> addrspace(1)* %out - ret void -} - -; Load an i16 value from the local address space. -; FUNC-LABEL: {{^}}load_i16_local: -; R600: LDS_USHORT_READ_RET -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_u16 -define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { -entry: - %0 = load i16 , i16 addrspace(3)* %in - %1 = zext i16 %0 to i32 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_i16_sext_local: -; R600: LDS_USHORT_READ_RET -; R600: BFE_INT -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_i16 -define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { -entry: - %0 = load i16, i16 addrspace(3)* %in - %1 = sext i16 %0 to i32 - store i32 %1, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v2i16_local: -; R600: LDS_USHORT_READ_RET -; R600: LDS_USHORT_READ_RET -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_u16 -; SI: ds_read_u16 -define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { -entry: - %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in - %1 = zext <2 x i16> %0 to <2 x i32> - store <2 x i32> %1, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v2i16_sext_local: -; R600-DAG: LDS_USHORT_READ_RET -; R600-DAG: LDS_USHORT_READ_RET -; R600-DAG: BFE_INT -; R600-DAG: BFE_INT -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_i16 -; SI: ds_read_i16 -define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { -entry: - %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in - %1 = sext <2 x i16> %0 to <2 x i32> - store <2 x i32> %1, <2 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v4i16_local: -; R600: LDS_USHORT_READ_RET -; R600: LDS_USHORT_READ_RET -; R600: LDS_USHORT_READ_RET -; R600: LDS_USHORT_READ_RET -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_read_u16 -define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { -entry: - %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in - %1 = zext <4 x i16> %0 to <4 x i32> - store <4 x i32> %1, <4 x i32> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}load_v4i16_sext_local: -; R600-DAG: LDS_USHORT_READ_RET -; R600-DAG: LDS_USHORT_READ_RET -; R600-DAG: LDS_USHORT_READ_RET -; R600-DAG: LDS_USHORT_READ_RET -; R600-DAG: BFE_INT -; R600-DAG: BFE_INT -; R600-DAG: BFE_INT -; R600-DAG: BFE_INT -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_i16 -; SI: ds_read_i16 -; SI: ds_read_i16 -; SI: ds_read_i16 -define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { -entry: - %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in - %1 = sext <4 x i16> %0 to <4 x i32> - store <4 x i32> %1, <4 x i32> addrspace(1)* %out - ret void -} - -; load an i32 value from the local address space. -; FUNC-LABEL: {{^}}load_i32_local: -; R600: LDS_READ_RET -; SI-NOT: s_wqm_b64 -; SI: s_mov_b32 m0 -; SI: ds_read_b32 -define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { -entry: - %0 = load i32, i32 addrspace(3)* %in - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; load a f32 value from the local address space. -; FUNC-LABEL: {{^}}load_f32_local: -; R600: LDS_READ_RET -; SI: s_mov_b32 m0 -; SI: ds_read_b32 -define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { -entry: - %0 = load float, float addrspace(3)* %in - store float %0, float addrspace(1)* %out - ret void -} - -; load a v2f32 value from the local address space -; FUNC-LABEL: {{^}}load_v2f32_local: -; R600: LDS_READ_RET -; R600: LDS_READ_RET -; SI: s_mov_b32 m0 -; SI: ds_read_b64 -define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { -entry: - %0 = load <2 x float>, <2 x float> addrspace(3)* %in - store <2 x float> %0, <2 x float> addrspace(1)* %out - ret void -} - -; Test loading a i32 and v2i32 value from the same base pointer. -; FUNC-LABEL: {{^}}load_i32_v2i32_local: -; R600: LDS_READ_RET -; R600: LDS_READ_RET -; R600: LDS_READ_RET -; SI-DAG: ds_read_b32 -; SI-DAG: ds_read2_b32 -define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) { - %scalar = load i32, i32 addrspace(3)* %in - %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)* - %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2 - %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4 - %vec1 = insertelement <2 x i32> , i32 %scalar, i32 0 - %vec = add <2 x i32> %vec0, %vec1 - store <2 x i32> %vec, <2 x i32> addrspace(1)* %out - ret void -} - - -@lds = addrspace(3) global [512 x i32] undef, align 4 - -; On SI we need to make sure that the base offset is a register and not -; an immediate. -; FUNC-LABEL: {{^}}load_i32_local_const_ptr: -; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0 -; SI: ds_read_b32 v{{[0-9]+}}, v[[ZERO]] offset:4 -; R600: LDS_READ_RET -define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { -entry: - %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1 - %tmp1 = load i32, i32 addrspace(3)* %tmp0 - %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1 - store i32 %tmp1, i32 addrspace(1)* %tmp2 - ret void -} Index: test/CodeGen/AMDGPU/load.vec.ll =================================================================== --- test/CodeGen/AMDGPU/load.vec.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s - -; load a v2i32 value from the global address space. -; EG: {{^}}load_v2i32: -; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0 -; SI: {{^}}load_v2i32: -; SI: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}] -define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { - %a = load <2 x i32>, <2 x i32> addrspace(1) * %in - store <2 x i32> %a, <2 x i32> addrspace(1)* %out - ret void -} - -; load a v4i32 value from the global address space. -; EG: {{^}}load_v4i32: -; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0 -; SI: {{^}}load_v4i32: -; SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}] -define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { - %a = load <4 x i32>, <4 x i32> addrspace(1) * %in - store <4 x i32> %a, <4 x i32> addrspace(1)* %out - ret void -} Index: test/CodeGen/AMDGPU/load64.ll =================================================================== --- test/CodeGen/AMDGPU/load64.ll +++ /dev/null @@ -1,31 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s - -; load a f64 value from the global address space. -; CHECK-LABEL: {{^}}load_f64: -; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}] -; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}] -define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) { - %1 = load double, double addrspace(1)* %in - store double %1, double addrspace(1)* %out - ret void -} - -; CHECK-LABEL: {{^}}load_i64: -; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}] -; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}] -define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { - %tmp = load i64, i64 addrspace(1)* %in - store i64 %tmp, i64 addrspace(1)* %out, align 8 - ret void -} - -; Load a f64 value from the constant address space. -; CHECK-LABEL: {{^}}load_const_addrspace_f64: -; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}] -; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}] -define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) { - %1 = load double, double addrspace(2)* %in - store double %1, double addrspace(1)* %out - ret void -} Index: test/CodeGen/AMDGPU/local-memory.ll =================================================================== --- test/CodeGen/AMDGPU/local-memory.ll +++ test/CodeGen/AMDGPU/local-memory.ll @@ -43,6 +43,41 @@ ret void } +@lds = addrspace(3) global [512 x i32] undef, align 4 + +; On SI we need to make sure that the base offset is a register and not +; an immediate. +; FUNC-LABEL: {{^}}load_i32_local_const_ptr: +; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0 +; GCN: ds_read_b32 v{{[0-9]+}}, v[[ZERO]] offset:4 +; R600: LDS_READ_RET +define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { +entry: + %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1 + %tmp1 = load i32, i32 addrspace(3)* %tmp0 + %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1 + store i32 %tmp1, i32 addrspace(1)* %tmp2 + ret void +} + +; Test loading a i32 and v2i32 value from the same base pointer. +; FUNC-LABEL: {{^}}load_i32_v2i32_local: +; R600: LDS_READ_RET +; R600: LDS_READ_RET +; R600: LDS_READ_RET +; GCN-DAG: ds_read_b32 +; GCN-DAG: ds_read2_b32 +define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) { + %scalar = load i32, i32 addrspace(3)* %in + %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)* + %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2 + %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4 + %vec1 = insertelement <2 x i32> , i32 %scalar, i32 0 + %vec = add <2 x i32> %vec0, %vec1 + store <2 x i32> %vec, <2 x i32> addrspace(1)* %out + ret void +} + declare i32 @llvm.r600.read.tidig.x() #0 declare void @llvm.AMDGPU.barrier.local() Index: test/CodeGen/AMDGPU/unaligned-load-store.ll =================================================================== --- test/CodeGen/AMDGPU/unaligned-load-store.ll +++ test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -1,300 +1,387 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s - -; SI-LABEL: {{^}}unaligned_load_store_i16_local: -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: s_endpgm -define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind { +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}local_unaligned_load_store_i16: +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: s_endpgm +define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 { %v = load i16, i16 addrspace(3)* %p, align 1 store i16 %v, i16 addrspace(3)* %r, align 1 ret void } -; SI-LABEL: {{^}}unaligned_load_store_i16_global: -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: s_endpgm -define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind { +; FUNC-LABEL: {{^}}unaligned_load_store_i16_global: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte + +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 { %v = load i16, i16 addrspace(1)* %p, align 1 store i16 %v, i16 addrspace(1)* %r, align 1 ret void } -; SI-LABEL: {{^}}unaligned_load_store_i32_local: -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: s_endpgm -define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind { +; FUNC-LABEL: {{^}}local_unaligned_load_store_i32: +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: s_endpgm +define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 { %v = load i32, i32 addrspace(3)* %p, align 1 store i32 %v, i32 addrspace(3)* %r, align 1 ret void } -; SI-LABEL: {{^}}unaligned_load_store_i32_global: -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind { +; FUNC-LABEL: {{^}}global_unaligned_load_store_i32: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte + +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 { %v = load i32, i32 addrspace(1)* %p, align 1 store i32 %v, i32 addrspace(1)* %r, align 1 ret void } -; SI-LABEL: {{^}}align2_load_store_i32_global: -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_store_short -; SI: buffer_store_short -define void @align2_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind { +; FUNC-LABEL: {{^}}global_align2_load_store_i32: +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_load_ushort +; GCN-NOHSA: buffer_store_short +; GCN-NOHSA: buffer_store_short + +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_load_ushort +; GCN-HSA: flat_store_short +; GCN-HSA: flat_store_short +define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 { %v = load i32, i32 addrspace(1)* %p, align 2 store i32 %v, i32 addrspace(1)* %r, align 2 ret void } -; SI-LABEL: {{^}}align2_load_store_i32_local: -; SI: ds_read_u16 -; SI: ds_read_u16 -; SI: ds_write_b16 -; SI: ds_write_b16 -define void @align2_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind { +; FUNC-LABEL: {{^}}local_align2_load_store_i32: +; GCN: ds_read_u16 +; GCN: ds_read_u16 +; GCN: ds_write_b16 +; GCN: ds_write_b16 +define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 { %v = load i32, i32 addrspace(3)* %p, align 2 store i32 %v, i32 addrspace(3)* %r, align 2 ret void } ; FIXME: Unnecessary packing and unpacking of bytes. -; SI-LABEL: {{^}}unaligned_load_store_i64_local: -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 - -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl -; SI: ds_write_b8 -; SI: s_endpgm -define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) { +; FUNC-LABEL: {{^}}local_unaligned_load_store_i64: +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 + +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl +; GCN: ds_write_b8 +; GCN: s_endpgm +define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) { %v = load i64, i64 addrspace(3)* %p, align 1 store i64 %v, i64 addrspace(3)* %r, align 1 ret void } -; SI-LABEL: {{^}}unaligned_load_store_v2i32_local: -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 - -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl - -; SI: ds_write_b8 -; XSI-NOT: v_or_b32 -; XSI-NOT: v_lshl -; SI: ds_write_b8 -; SI: s_endpgm -define void @unaligned_load_store_v2i32_local(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) { +; FUNC-LABEL: {{^}}local_unaligned_load_store_v2i32: +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 + +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl + +; GCN: ds_write_b8 +; XGCN-NOT: v_or_b32 +; XGCN-NOT: v_lshl +; GCN: ds_write_b8 +; GCN: s_endpgm +define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) { %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1 store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1 ret void } -; SI-LABEL: {{^}}unaligned_load_store_i64_global: -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte - -; XSI-NOT: v_or_ -; XSI-NOT: v_lshl - -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -; SI: buffer_store_byte -define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) { +; FUNC-LABEL: {{^}}unaligned_load_store_i64_global: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte + +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte + +; XGCN-NOT: v_or_ +; XGCN-NOT: v_lshl + +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte + +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 { %v = load i64, i64 addrspace(1)* %p, align 1 store i64 %v, i64 addrspace(1)* %r, align 1 ret void } -; SI-LABEL: {{^}}unaligned_load_store_v4i32_local: -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 - -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 - -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 - -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 - -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: ds_write_b8 - -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: ds_write_b8 - -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: ds_write_b8 - -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: ds_write_b8 -; SI: s_endpgm -define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind { +; FUNC-LABEL: {{^}}local_unaligned_load_store_v4i32: +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 + +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 + +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 + +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 + +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 + +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 + +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 + +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: s_endpgm +define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 { %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1 ret void } -; FIXME: We mark v4i32 as custom, so misaligned loads are never expanded. -; FIXME-SI-LABEL: {{^}}unaligned_load_store_v4i32_global -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -; FIXME-SI: buffer_load_ubyte -define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind { +; FUNC-LABEL: {{^}}global_unaligned_load_store_v4i32: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte + +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte +; GCN-NOHSA: buffer_store_byte + + +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte + +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +; GCN-HSA: flat_store_byte +define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 { %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1 ret void } -; SI-LABEL: {{^}}load_lds_i64_align_4: -; SI: ds_read2_b32 -; SI: s_endpgm -define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { +; FUNC-LABEL: {{^}}local_load_i64_align_4: +; GCN: ds_read2_b32 +define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { %val = load i64, i64 addrspace(3)* %in, align 4 store i64 %val, i64 addrspace(1)* %out, align 8 ret void } -; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset -; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9 -; SI: s_endpgm -define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { +; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset +; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9 +define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4 %val = load i64, i64 addrspace(3)* %ptr, align 4 store i64 %val, i64 addrspace(1)* %out, align 8 ret void } -; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset: +; FUNC-LABEL: {{^}}local_load_i64_align_4_with_split_offset: ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits -; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1 -; SI: s_endpgm -define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { +; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1 +; GCN: s_endpgm +define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)* %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)* @@ -303,49 +390,95 @@ ret void } -; SI-LABEL: {{^}}load_lds_i64_align_1: -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: ds_read_u8 -; SI: buffer_store_dwordx2 -; SI: s_endpgm - -define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { +; FUNC-LABEL: {{^}}local_load_i64_align_1: +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: ds_read_u8 +; GCN: store_dwordx2 +define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { %val = load i64, i64 addrspace(3)* %in, align 1 store i64 %val, i64 addrspace(1)* %out, align 8 ret void } -; SI-LABEL: {{^}}store_lds_i64_align_4: -; SI: ds_write2_b32 -; SI: s_endpgm -define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 { +; FUNC-LABEL: {{^}}local_store_i64_align_4: +; GCN: ds_write2_b32 +define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 { store i64 %val, i64 addrspace(3)* %out, align 4 ret void } -; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset -; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9 -; SI: s_endpgm -define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 { +; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset +; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9 +; GCN: s_endpgm +define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 { %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4 store i64 0, i64 addrspace(3)* %ptr, align 4 ret void } -; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset: +; FUNC-LABEL: {{^}}local_store_i64_align_4_with_split_offset: ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits -; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1 -; SI: s_endpgm -define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 { +; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1 +; GCN: s_endpgm +define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 { %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)* %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)* store i64 0, i64 addrspace(3)* %out, align 4 ret void } + +; FUNC-LABEL: {{^}}constant_load_unaligned_i16: +; GCN-NOHSA: buffer_load_ushort +; GCN-HSA: flat_load_ushort + +; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +define void @constant_load_unaligned_i16(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { +entry: + %tmp0 = getelementptr i16, i16 addrspace(2)* %in, i32 1 + %tmp1 = load i16, i16 addrspace(2)* %tmp0 + %tmp2 = zext i16 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_unaligned_i32: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte + +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +define void @constant_load_unaligned_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { +entry: + %tmp0 = load i32, i32 addrspace(2)* %in, align 1 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}constant_load_unaligned_f32: +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte +; GCN-NOHSA: buffer_load_ubyte + +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +; GCN-HSA: flat_load_ubyte +define void @constant_load_unaligned_f32(float addrspace(1)* %out, float addrspace(2)* %in) { + %tmp1 = load float, float addrspace(2)* %in, align 1 + store float %tmp1, float addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind }