Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1098,6 +1098,32 @@ } const unsigned WideSize = WideTy.getSizeInBits(); + if (SrcTy != WideTy) { + // Try to unmerge to an intermediate type to avoid creating a wide, illegal + // shiift. + + LLT GCDTy = getGCDType(SrcTy, WideTy); + if (GCDTy != DstTy && SrcTy != GCDTy && + GCDTy.getSizeInBits() >= DstTy.getSizeInBits()) { + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + const int NumUnmerge = Unmerge->getNumOperands() - 1; + const int PartsPerUnmerge = NumDst / NumUnmerge; + + assert(NumUnmerge > 1 && PartsPerUnmerge > 1); + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) + MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); + MIB.addUse(Unmerge.getReg(I)); + } + + MI.eraseFromParent(); + return Legalized; + } + } + unsigned NewSrcSize = NumDst * WideSize; LLT NewSrcTy = LLT::scalar(NewSrcSize); unsigned SizeDiff = WideSize - DstTy.getSizeInBits(); Index: test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -175,28 +175,20 @@ liveins: $vgpr0 ; CHECK-LABEL: name: test_unmerge_s8_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[TRUNC]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C1]](s64) - ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[OR]], [[TRUNC1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; CHECK: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[OR1]], [[TRUNC2]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR2]](s64) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ANYEXT]], [[C]](s32) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) - ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[ANYEXT1]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK: $vgpr1 = COPY [[COPY2]](s32) + ; CHECK: $vgpr2 = COPY [[COPY3]](s32) + ; CHECK: $vgpr3 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8) = G_UNMERGE_VALUES %0 %5:_(s32) = G_ANYEXT %1 @@ -209,6 +201,148 @@ $vgpr3 = COPY %8 ... +--- +name: test_unmerge_s8_s48 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_unmerge_s8_s48 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[COPY]](s64) + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC]](s48) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ANYEXT]], [[C]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[ANYEXT1]], [[C]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ANYEXT2]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK: $vgpr1 = COPY [[COPY2]](s32) + ; CHECK: $vgpr2 = COPY [[COPY3]](s32) + ; CHECK: $vgpr3 = COPY [[COPY4]](s32) + ; CHECK: $vgpr4 = COPY [[COPY5]](s32) + ; CHECK: $vgpr5 = COPY [[COPY6]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s48) = G_TRUNC %0 + %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8), %6:_(s8), %7:_(s8) = G_UNMERGE_VALUES %1 + %8:_(s32) = G_ANYEXT %2 + %9:_(s32) = G_ANYEXT %3 + %10:_(s32) = G_ANYEXT %4 + %11:_(s32) = G_ANYEXT %5 + %12:_(s32) = G_ANYEXT %6 + %13:_(s32) = G_ANYEXT %7 + $vgpr0 = COPY %8 + $vgpr1 = COPY %9 + $vgpr2 = COPY %10 + $vgpr3 = COPY %11 + $vgpr4 = COPY %12 + $vgpr5 = COPY %13 +... + +--- +name: test_unmerge_s8_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_unmerge_s8_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ANYEXT]], [[C]](s32) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[ANYEXT1]], [[C]](s32) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ANYEXT2]], [[C]](s32) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ANYEXT3]], [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ANYEXT1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ANYEXT2]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ANYEXT3]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32) + ; CHECK: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK: $vgpr1 = COPY [[COPY2]](s32) + ; CHECK: $vgpr2 = COPY [[COPY3]](s32) + ; CHECK: $vgpr3 = COPY [[COPY4]](s32) + ; CHECK: $vgpr4 = COPY [[COPY5]](s32) + ; CHECK: $vgpr5 = COPY [[COPY6]](s32) + ; CHECK: $vgpr6 = COPY [[COPY7]](s32) + ; CHECK: $vgpr7 = COPY [[COPY8]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %0 + %10:_(s32) = G_ANYEXT %1 + %11:_(s32) = G_ANYEXT %2 + %12:_(s32) = G_ANYEXT %3 + %13:_(s32) = G_ANYEXT %4 + %14:_(s32) = G_ANYEXT %5 + %15:_(s32) = G_ANYEXT %6 + %16:_(s32) = G_ANYEXT %7 + %17:_(s32) = G_ANYEXT %8 + $vgpr0 = COPY %10 + $vgpr1 = COPY %11 + $vgpr2 = COPY %12 + $vgpr3 = COPY %13 + $vgpr4 = COPY %14 + $vgpr5 = COPY %15 + $vgpr6 = COPY %16 + $vgpr7 = COPY %17 +... + +--- +name: test_unmerge_s8_p1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: test_unmerge_s8_p1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](p1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: $vgpr3 = COPY [[ANYEXT3]](s32) + ; CHECK: $vgpr4 = COPY [[ANYEXT4]](s32) + ; CHECK: $vgpr5 = COPY [[ANYEXT5]](s32) + ; CHECK: $vgpr6 = COPY [[ANYEXT6]](s32) + ; CHECK: $vgpr7 = COPY [[ANYEXT7]](s32) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s8), %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %0 + %10:_(s32) = G_ANYEXT %1 + %11:_(s32) = G_ANYEXT %2 + %12:_(s32) = G_ANYEXT %3 + %13:_(s32) = G_ANYEXT %4 + %14:_(s32) = G_ANYEXT %5 + %15:_(s32) = G_ANYEXT %6 + %16:_(s32) = G_ANYEXT %7 + %17:_(s32) = G_ANYEXT %8 + $vgpr0 = COPY %10 + $vgpr1 = COPY %11 + $vgpr2 = COPY %12 + $vgpr3 = COPY %13 + $vgpr4 = COPY %14 + $vgpr5 = COPY %15 + $vgpr6 = COPY %16 + $vgpr7 = COPY %17 +... + --- name: test_unmerge_s16_s32 body: |