Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -120,6 +120,9 @@ void extractParts(unsigned Reg, LLT Ty, int NumParts, SmallVectorImpl &VRegs); + LegalizeResult fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy); + LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); MachineRegisterInfo &MRI; Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1268,6 +1268,55 @@ } } +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + + LLT NarrowTy0 = NarrowTy; + LLT NarrowTy1; + unsigned NumParts; + + if (NarrowTy.isScalar()) { + NumParts = DstTy.getNumElements(); + NarrowTy1 = SrcTy.getElementType(); + } else { + // Uneven breakdown not handled. + NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); + if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) + return UnableToLegalize; + + NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); + } + + SmallVector SrcRegs, DstRegs; + extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); + + for (unsigned I = 0; I < NumParts; ++I) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) + .addDef(DstReg) + .addUse(SrcRegs[I]); + + NewInst->setFlags(MI.getFlags()); + DstRegs.push_back(DstReg); + } + + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -1401,6 +1450,11 @@ MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_FPEXT: + return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); } } Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -155,7 +155,15 @@ getActionDefinitionsBuilder(G_FPEXT) .legalFor({{S64, S32}, {S32, S16}}) - .lowerFor({{S64, S16}}); // FIXME: Implement + .lowerFor({{S64, S16}}) // FIXME: Implement + .fewerElementsIf( + [](const LegalityQuery &Query) { + return Query.Types[0].isVector(); + }, + [](const LegalityQuery &Query) { + return std::make_pair( + 0, Query.Types[0].getElementType()); + }); getActionDefinitionsBuilder(G_FSUB) // Use actual fsub instruction @@ -175,7 +183,15 @@ .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, {S32, S1}, {S64, S1}, {S16, S1}, // FIXME: Hack - {S128, S32}}); + {S128, S32}}) + .fewerElementsIf( + [](const LegalityQuery &Query) { + return Query.Types[0].isVector(); + }, + [](const LegalityQuery &Query) { + return std::make_pair( + 0, Query.Types[0].getElementType()); + }); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) .legalFor({{S32, S32}, {S64, S32}}); Index: test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -75,3 +75,107 @@ %1:_(s64) = G_ANYEXT %0 $vgpr0_vgpr1 = COPY %1 ... + +--- +name: test_anyext_v2s16_to_v2s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_v2s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s32>) = G_ANYEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_anyext_v3s16_to_v3s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_v3s16_to_v3s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = G_ANYEXT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_anyext_v4s16_to_v4s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_v4s16_to_v4s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x s32>) + %0:_(<4 x s16>) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_ANYEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_anyext_v2s32_to_v2s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_anyext_v2s32_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_ANYEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_anyext_v3s32_to_v3s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_anyext_v3s32_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s64>) = G_ANYEXT %0 + S_NOP 0, implicit %1 + +... + +--- +name: test_anyext_v4s32_to_v4s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_anyext_v4s32_to_v4s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64), [[ANYEXT3]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s64>) = G_ANYEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir @@ -18,6 +18,81 @@ $vgpr0 = COPY %2 ... +--- +name: test_fpext_v2f16_to_v2f32 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fpext_v2f16_to_v2f32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: %4:_(s32) = nnan G_FPEXT [[UV]](s16) + ; CHECK: %5:_(s32) = nnan G_FPEXT [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s32>) = nnan G_FPEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_fpext_v2f16_to_v2f32_w_flags +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fpext_v2f16_to_v2f32_w_flags + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: %4:_(s32) = nnan G_FPEXT [[UV]](s16) + ; CHECK: %5:_(s32) = nnan G_FPEXT [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s32>) = nnan G_FPEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_fpext_v3f16_to_v3f32 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fpext_v3f16_to_v3f32 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; CHECK: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = G_FPEXT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_fpext_v4f16_to_v4f32 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fpext_v4f16_to_v4f32 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; CHECK: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; CHECK: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT2]](s32), [[FPEXT3]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s16>) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_FPEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + --- name: test_fpext_f32_to_f64 body: | @@ -32,3 +107,61 @@ %1:_(s64) = G_FPEXT %0 $vgpr0_vgpr1 = COPY %1 ... + +--- +name: test_fpext_v2f32_to_v2f64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_fpext_v2f32_to_v2f64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[UV]](s32) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FPEXT]](s64), [[FPEXT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_FPEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_fpext_v3f32_to_v3f64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_fpext_v3f32_to_v3f64 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[UV]](s32) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[UV1]](s32) + ; CHECK: [[FPEXT2:%[0-9]+]]:_(s64) = G_FPEXT [[UV2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[FPEXT]](s64), [[FPEXT1]](s64), [[FPEXT2]](s64) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s64>) = G_FPEXT %0 + S_NOP 0, implicit %1 + +... + +--- +name: test_fpext_v4f32_to_v4f64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_fpext_v4f32_to_v4f64 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[UV]](s32) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[UV1]](s32) + ; CHECK: [[FPEXT2:%[0-9]+]]:_(s64) = G_FPEXT [[UV2]](s32) + ; CHECK: [[FPEXT3:%[0-9]+]]:_(s64) = G_FPEXT [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[FPEXT]](s64), [[FPEXT1]](s64), [[FPEXT2]](s64), [[FPEXT3]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s64>) = G_FPEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -74,3 +74,118 @@ %2:_(s32) = G_SEXT %1 $vgpr0 = COPY %2 ... + +--- +name: test_sext_v2s16_to_v2s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_v2s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s32>) = G_SEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_sext_v3s16_to_v3s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_v3s16_to_v3s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; CHECK: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32), [[SEXT2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = G_SEXT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_sext_v4s16_to_v4s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_v4s16_to_v4s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; CHECK: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16) + ; CHECK: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32), [[SEXT2]](s32), [[SEXT3]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s16>) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_SEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_sext_v2s32_to_v2s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sext_v2s32_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_SEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_sext_v3s32_to_v3s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_sext_v3s32_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32) + ; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s64>) = G_SEXT %0 + S_NOP 0, implicit %1 + +... + +--- +name: test_sext_v4s32_to_v4s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_sext_v4s32_to_v4s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32) + ; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32) + ; CHECK: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64), [[SEXT3]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s64>) = G_SEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -79,3 +79,118 @@ %1:_(s64) = G_ZEXT %0 $vgpr0_vgpr1 = COPY %1 ... + +--- +name: test_zext_v2s16_to_v2s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_v2s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s32>) = G_ZEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_zext_v3s16_to_v3s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_v3s16_to_v3s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = G_ZEXT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_zext_v4s16_to_v4s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_v4s16_to_v4s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s16>) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_ZEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_zext_v2s32_to_v2s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zext_v2s32_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_ZEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_zext_v3s32_to_v3s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_zext_v3s32_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s64>) = G_ZEXT %0 + S_NOP 0, implicit %1 + +... + +--- +name: test_zext_v4s32_to_v4s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_zext_v4s32_to_v4s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64), [[ZEXT3]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s64>) = G_ZEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 +...