Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1335,6 +1335,55 @@ MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_FPEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + + LLT NarrowTy0 = NarrowTy; + LLT NarrowTy1; + unsigned NumParts; + + if (NarrowTy.isScalar()) { + NumParts = DstTy.getNumElements(); + NarrowTy1 = SrcTy.getElementType(); + } else { + // Uneven breakdown not handled. + NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); + if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) + return UnableToLegalize; + + NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); + } + + SmallVector SrcRegs, DstRegs; + extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); + + for (unsigned I = 0; I < NumParts; ++I) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) + .addDef(DstReg) + .addUse(SrcRegs[I]); + + NewInst->setFlags(MI.getFlags()); + DstRegs.push_back(DstReg); + } + + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; + } } } Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -141,6 +141,18 @@ getActionDefinitionsBuilder(G_FPTRUNC) .legalFor({{S32, S64}}); + getActionDefinitionsBuilder(G_FPEXT) + .legalFor({{S64, S32}, {S32, S16}}) + .lowerFor({{S64, S16}}) // FIXME: Implement + .fewerElementsIf( + [](const LegalityQuery &Query) { + return Query.Types[0].isVector(); + }, + [](const LegalityQuery &Query) { + return std::make_pair( + 0, Query.Types[0].getElementType()); + }); + // Use actual fsub instruction setAction({G_FSUB, S32}, Legal); @@ -151,13 +163,16 @@ setAction({G_FCMP, 1, S32}, Legal); setAction({G_FCMP, 1, S64}, Legal); - getActionDefinitionsBuilder(G_ANYEXT) - .legalFor({{S32, S16}, {S64, S32}, {S64, S16}}); - - getActionDefinitionsBuilder(G_SEXT) - .legalFor({{S64, S32}}); - getActionDefinitionsBuilder(G_ZEXT) - .legalFor({{S64, S32}}); + getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) + .legalFor({{S64, S32}, {S32, S16}, {S64, S16}}) + .fewerElementsIf( + [](const LegalityQuery &Query) { + return Query.Types[0].isVector(); + }, + [](const LegalityQuery &Query) { + return std::make_pair( + 0, Query.Types[0].getElementType()); + }); setAction({G_FPTOSI, S32}, Legal); setAction({G_FPTOSI, 1, S32}, Legal); Index: test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -63,3 +63,122 @@ %2:_(s32) = G_ANYEXT %1 $vgpr0 = COPY %2 ... + +--- +name: test_anyext_v2s16_to_v2s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_v2s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s32>) = G_ANYEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_anyext_v3s16_to_v3s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_v3s16_to_v3s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = G_ANYEXT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_anyext_v4s16_to_v4s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_v4s16_to_v4s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x s32>) + %0:_(<4 x s16>) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_ANYEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_anyext_s32_to_s64 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_s32_to_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s64) = G_ANYEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_anyext_v2s32_to_v2s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_anyext_v2s32_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_ANYEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_anyext_v3s32_to_v3s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_anyext_v3s32_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s64>) = G_ANYEXT %0 + S_NOP 0, implicit %1 + +... + +--- +name: test_anyext_v4s32_to_v4s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_anyext_v4s32_to_v4s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[UV2]](s32) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ANYEXT]](s64), [[ANYEXT1]](s64), [[ANYEXT2]](s64), [[ANYEXT3]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s64>) = G_ANYEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir @@ -0,0 +1,167 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: test_fpext_f16_to_f32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fpext_f16_to_f32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; CHECK: $vgpr0 = COPY [[FPEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s32) = G_FPEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: test_fpext_v2f16_to_v2f32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fpext_v2f16_to_v2f32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: %4:_(s32) = nnan G_FPEXT [[UV]](s16) + ; CHECK: %5:_(s32) = nnan G_FPEXT [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s32>) = nnan G_FPEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_fpext_v2f16_to_v2f32_w_flags +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fpext_v2f16_to_v2f32_w_flags + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: %4:_(s32) = nnan G_FPEXT [[UV]](s16) + ; CHECK: %5:_(s32) = nnan G_FPEXT [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s32>) = nnan G_FPEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_fpext_v3f16_to_v3f32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fpext_v3f16_to_v3f32 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; CHECK: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = G_FPEXT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_fpext_v4f16_to_v4f32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fpext_v4f16_to_v4f32 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) + ; CHECK: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) + ; CHECK: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FPEXT]](s32), [[FPEXT1]](s32), [[FPEXT2]](s32), [[FPEXT3]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s16>) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_FPEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_fpext_f32_to_f64 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_fpext_f32_to_f64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[COPY]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[FPEXT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s64) = G_FPEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_fpext_v2f32_to_v2f64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_fpext_v2f32_to_v2f64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[UV]](s32) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FPEXT]](s64), [[FPEXT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_FPEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_fpext_v3f32_to_v3f64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_fpext_v3f32_to_v3f64 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[UV]](s32) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[UV1]](s32) + ; CHECK: [[FPEXT2:%[0-9]+]]:_(s64) = G_FPEXT [[UV2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[FPEXT]](s64), [[FPEXT1]](s64), [[FPEXT2]](s64) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s64>) = G_FPEXT %0 + S_NOP 0, implicit %1 + +... + +--- +name: test_fpext_v4f32_to_v4f64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_fpext_v4f32_to_v4f64 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[FPEXT:%[0-9]+]]:_(s64) = G_FPEXT [[UV]](s32) + ; CHECK: [[FPEXT1:%[0-9]+]]:_(s64) = G_FPEXT [[UV1]](s32) + ; CHECK: [[FPEXT2:%[0-9]+]]:_(s64) = G_FPEXT [[UV2]](s32) + ; CHECK: [[FPEXT3:%[0-9]+]]:_(s64) = G_FPEXT [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[FPEXT]](s64), [[FPEXT1]](s64), [[FPEXT2]](s64), [[FPEXT3]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s64>) = G_FPEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -74,3 +74,133 @@ %2:_(s32) = G_SEXT %1 $vgpr0 = COPY %2 ... + +--- +name: test_sext_v2s16_to_v2s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_v2s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s32>) = G_SEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_sext_v3s16_to_v3s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_v3s16_to_v3s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; CHECK: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32), [[SEXT2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = G_SEXT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_sext_v4s16_to_v4s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_v4s16_to_v4s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV]](s16) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; CHECK: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[UV2]](s16) + ; CHECK: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[SEXT]](s32), [[SEXT1]](s32), [[SEXT2]](s32), [[SEXT3]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s16>) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_SEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_sext_s32_to_s64 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_s32_to_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s64) = G_SEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_sext_v2s32_to_v2s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sext_v2s32_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_SEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_sext_v3s32_to_v3s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_sext_v3s32_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32) + ; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s64>) = G_SEXT %0 + S_NOP 0, implicit %1 + +... + +--- +name: test_sext_v4s32_to_v4s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_sext_v4s32_to_v4s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV]](s32) + ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV1]](s32) + ; CHECK: [[SEXT2:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32) + ; CHECK: [[SEXT3:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SEXT]](s64), [[SEXT1]](s64), [[SEXT2]](s64), [[SEXT3]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s64>) = G_SEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -69,3 +69,133 @@ %2:_(s32) = G_ZEXT %1 $vgpr0 = COPY %2 ... + +--- +name: test_zext_v2s16_to_v2s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_v2s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s32>) = G_ZEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_zext_v3s16_to_v3s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_v3s16_to_v3s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + %0:_(<3 x s16>) = G_IMPLICIT_DEF + %1:_(<3 x s32>) = G_ZEXT %0 + $vgpr0_vgpr1_vgpr2 = COPY %1 +... + +--- +name: test_zext_v4s16_to_v4s32 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_v4s16_to_v4s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UV2]](s16) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UV3]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ZEXT]](s32), [[ZEXT1]](s32), [[ZEXT2]](s32), [[ZEXT3]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s16>) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_ZEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_zext_s32_to_s64 +body: | + bb.0.entry: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_s32_to_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + %0:_(s32) = COPY $vgpr0 + %1:_(s64) = G_ZEXT %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_zext_v2s32_to_v2s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zext_v2s32_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s64>) = G_ZEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_zext_v3s32_to_v3s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_zext_v3s32_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s64>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s64>) = G_ZEXT %0 + S_NOP 0, implicit %1 + +... + +--- +name: test_zext_v4s32_to_v4s64 +body: | + bb.0.entry: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_zext_v4s32_to_v4s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV]](s32) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV1]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64), [[ZEXT2]](s64), [[ZEXT3]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s64>) = G_ZEXT %0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 +...