diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2594,12 +2594,12 @@ >; def : GCNPat < - (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))), + (v2i16 (UniformBinFrag (i16 SReg_32:$src0), (i16 undef))), (COPY_TO_REGCLASS SReg_32:$src0, SReg_32) >; def : GCNPat < - (v2i16 (build_vector (i16 VGPR_32:$src0), (i16 undef))), + (v2i16 (DivergentBinFrag (i16 VGPR_32:$src0), (i16 undef))), (COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32) >; diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX906 %s ; GCN-LABEL: name: uniform_vec_0_i16 ; GCN: S_LSHL_B32 @@ -213,3 +214,25 @@ %val = bitcast <2 x half> %vec to float ret float %val } + +; GFX906-LABEL: name: build_vec_v2i16_undeflo_divergent +; GFX906: %[[LOAD:[0-9]+]]:vgpr_32 = DS_READ_U16 +; GFX906: %{{[0-9]+}}:vgpr_32 = COPY %[[LOAD]] +define <2 x i16> @build_vec_v2i16_undeflo_divergent(i16 addrspace(3)* %in) #0 { +entry: + %load = load i16, i16 addrspace(3)* %in + %build = insertelement <2 x i16> undef, i16 %load, i32 0 + ret <2 x i16> %build +} + +; GFX906-LABEL: name: build_vec_v2i16_undeflo_uniform +; GFX906: %[[LOAD:[0-9]+]]:vgpr_32 = DS_READ_U16 +; GFX906: %{{[0-9]+}}:sreg_32 = COPY %[[LOAD]] +define amdgpu_kernel void @build_vec_v2i16_undeflo_uniform(i16 addrspace(3)* %in, i32 addrspace(1)* %out) #0 { +entry: + %load = load i16, i16 addrspace(3)* %in + %build = insertelement <2 x i16> undef, i16 %load, i32 0 + %result = bitcast <2 x i16> %build to i32 + store i32 %result, i32 addrspace(1)* %out + ret void +}