diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1800,6 +1800,7 @@ case TargetOpcode::G_FFLOOR: return legalizeFFloor(MI, MRI, B); case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_BUILD_VECTOR_TRUNC: return legalizeBuildVector(MI, MRI, B); case TargetOpcode::G_MUL: return legalizeMul(Helper, MI); @@ -2888,11 +2889,24 @@ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { Register Dst = MI.getOperand(0).getReg(); const LLT S32 = LLT::scalar(32); + const LLT S16 = LLT::scalar(16); assert(MRI.getType(Dst) == LLT::fixed_vector(2, 16)); Register Src0 = MI.getOperand(1).getReg(); Register Src1 = MI.getOperand(2).getReg(); - assert(MRI.getType(Src0) == LLT::scalar(16)); + + if (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC) { + assert(MRI.getType(Src0) == S32); + + Src0 = MRI.createGenericVirtualRegister(S16); + B.buildTrunc(Src0, MI.getOperand(1).getReg()); + + Src1 = MRI.createGenericVirtualRegister(S16); + B.buildTrunc(Src1, MI.getOperand(2).getReg()); + } + + assert(MRI.getType(Src0) == S16); + assert(MRI.getType(Src1) == S16); auto Merge = B.buildMerge(S32, {Src0, Src1}); B.buildBitcast(Dst, Merge); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s --- name: legal_s32_to_v2s16 @@ -16,6 +17,19 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; VI-LABEL: name: legal_s32_to_v2s16 + ; VI: liveins: $vgpr0, $vgpr1 + ; VI-NEXT: {{ $}} + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BITCAST]](<2 x s16>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1