diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1791,6 +1791,7 @@ case TargetOpcode::G_FFLOOR: return legalizeFFloor(MI, MRI, B); case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_BUILD_VECTOR_TRUNC: return legalizeBuildVector(MI, MRI, B); case TargetOpcode::G_MUL: return legalizeMul(Helper, MI); @@ -2859,11 +2860,17 @@ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { Register Dst = MI.getOperand(0).getReg(); const LLT S32 = LLT::scalar(32); + const LLT S16 = LLT::scalar(16); assert(MRI.getType(Dst) == LLT::fixed_vector(2, 16)); Register Src0 = MI.getOperand(1).getReg(); Register Src1 = MI.getOperand(2).getReg(); - assert(MRI.getType(Src0) == LLT::scalar(16)); + + if (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC) { + assert(MRI.getType(Src0) == S32); + Src0 = B.buildTrunc(S16, MI.getOperand(1).getReg()).getReg(0); + Src1 = B.buildTrunc(S16, MI.getOperand(2).getReg()).getReg(0); + } auto Merge = B.buildMerge(S32, {Src0, Src1}); B.buildBitcast(Dst, Merge); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-trunc.mir @@ -1,7 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=PREGFX8 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=PREGFX8 %s --- name: legal_s32_to_v2s16 @@ -9,13 +11,26 @@ bb.0: liveins: $vgpr0, $vgpr1 - ; GFX9-LABEL: name: legal_s32_to_v2s16 - ; GFX9: liveins: $vgpr0, $vgpr1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) - ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9PLUS-LABEL: name: legal_s32_to_v2s16 + ; GFX9PLUS: liveins: $vgpr0, $vgpr1 + ; GFX9PLUS-NEXT: {{ $}} + ; GFX9PLUS-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[COPY1]](s32) + ; GFX9PLUS-NEXT: S_NOP 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; PREGFX8-LABEL: name: legal_s32_to_v2s16 + ; PREGFX8: liveins: $vgpr0, $vgpr1 + ; PREGFX8-NEXT: {{ $}} + ; PREGFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PREGFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PREGFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PREGFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; PREGFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; PREGFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PREGFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; PREGFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PREGFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PREGFX8-NEXT: S_NOP 0, implicit [[BITCAST]](<2 x s16>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1