Index: include/llvm/Support/MathExtras.h =================================================================== --- include/llvm/Support/MathExtras.h +++ include/llvm/Support/MathExtras.h @@ -559,15 +559,20 @@ } /// Return the greatest common divisor of the values using Euclid's algorithm. -inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { +template +inline T greatestCommonDivisor(T A, T B) { while (B) { - uint64_t T = B; + T Tmp = B; B = A % B; - A = T; + A = Tmp; } return A; } +inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { + return greatestCommonDivisor(A, B); +} + /// This function takes a 64-bit integer and returns the bit equivalent double. inline double BitsToDouble(uint64_t Bits) { double D; Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -796,71 +796,107 @@ Register Src1 = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(Src1); - int NumMerge = DstTy.getSizeInBits() / WideTy.getSizeInBits(); + const int DstSize = DstTy.getSizeInBits(); + const int SrcSize = SrcTy.getSizeInBits(); + const int WideSize = WideTy.getSizeInBits(); + const int NumMerge = (DstSize + WideSize - 1) / WideSize; - // Try to turn this into a merge of merges if we can use the requested type as - // the source. - if (NumMerge > 1) { - int PartsPerMerge = WideTy.getSizeInBits() / SrcTy.getSizeInBits(); - if (WideTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) - return UnableToLegalize; - - int RemainderBits = DstTy.getSizeInBits() % WideTy.getSizeInBits(); - int RemainderParts = RemainderBits / SrcTy.getSizeInBits(); + unsigned NumOps = MI.getNumOperands(); + unsigned NumSrc = MI.getNumOperands() - 1; + unsigned PartSize = DstTy.getSizeInBits() / NumSrc; - SmallVector Parts; - SmallVector SubMerges; + if (WideSize >= DstSize) { + // Directly pack the bits in the target type. + Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); - for (int I = 0; I != NumMerge; ++I) { - for (int J = 0; J != PartsPerMerge; ++J) - Parts.push_back(MI.getOperand(I * PartsPerMerge + J + 1).getReg()); + for (unsigned I = 2; I != NumOps; ++I) { + const unsigned Offset = (I - 1) * PartSize; - auto SubMerge = MIRBuilder.buildMerge(WideTy, Parts); - SubMerges.push_back(SubMerge.getReg(0)); - Parts.clear(); - } + Register SrcReg = MI.getOperand(I).getReg(); + assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); - if (RemainderParts == 0) { - MIRBuilder.buildMerge(DstReg, SubMerges); - MI.eraseFromParent(); - return Legalized; - } + auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); - assert(RemainderParts == 1); + Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg : + MRI.createGenericVirtualRegister(WideTy); - auto AnyExt = MIRBuilder.buildAnyExt( - WideTy, MI.getOperand(MI.getNumOperands() - 1).getReg()); - SubMerges.push_back(AnyExt.getReg(0)); + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); + auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); + MIRBuilder.buildOr(NextResult, ResultReg, Shl); + ResultReg = NextResult; + } - LLT WiderDstTy = LLT::scalar(SubMerges.size() * WideTy.getSizeInBits()); - auto Merge = MIRBuilder.buildMerge(WiderDstTy, SubMerges); - MIRBuilder.buildTrunc(DstReg, Merge); + if (WideSize > DstSize) + MIRBuilder.buildTrunc(DstReg, ResultReg); MI.eraseFromParent(); return Legalized; } - unsigned NumOps = MI.getNumOperands(); - unsigned NumSrc = MI.getNumOperands() - 1; - unsigned PartSize = DstTy.getSizeInBits() / NumSrc; - - Register ResultReg = MIRBuilder.buildZExt(DstTy, Src1).getReg(0); - - for (unsigned I = 2; I != NumOps; ++I) { - const unsigned Offset = (I - 1) * PartSize; - + // Unmerge the original values to the GCD type, and recombine to the next + // multiple greater than the original type. + // + // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6 + // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0 + // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1 + // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2 + // %10:_(s6) = G_MERGE_VALUES %4, %5, %6 + // %11:_(s6) = G_MERGE_VALUES %7, %8, %9 + // %12:_(s12) = G_MERGE_VALUES %10, %11 + // + // Padding with undef if necessary: + // + // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6 + // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0 + // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1 + // %7:_(s2) = G_IMPLICIT_DEF + // %8:_(s6) = G_MERGE_VALUES %3, %4, %5 + // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 + // %10:_(s12) = G_MERGE_VALUES %8, %9 + + const int GCD = greatestCommonDivisor(SrcSize, WideSize); + LLT GCDTy = LLT::scalar(GCD); + + SmallVector Parts; + SmallVector NewMergeRegs; + SmallVector Unmerges; + LLT WideDstTy = LLT::scalar(NumMerge * WideSize); + + // Decompose the original operands if they don't evenly divide. + for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { Register SrcReg = MI.getOperand(I).getReg(); - assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); + if (GCD == SrcSize) { + Unmerges.push_back(SrcReg); + } else { + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J) + Unmerges.push_back(Unmerge.getReg(J)); + } + } - auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg); + // Pad with undef to the next size that is a multiple of the requested size. + if (static_cast(Unmerges.size()) != NumMerge * WideSize) { + Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0); + for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I) + Unmerges.push_back(UndefReg); + } - Register NextResult = I + 1 == NumOps ? DstReg : - MRI.createGenericVirtualRegister(DstTy); + const int PartsPerGCD = WideSize / GCD; - auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset); - auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt); - MIRBuilder.buildOr(NextResult, ResultReg, Shl); - ResultReg = NextResult; + // Build merges of each piece. + ArrayRef Slicer(Unmerges); + for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { + auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); + NewMergeRegs.push_back(Merge.getReg(0)); + } + + // A truncate may be necessary if the requested type doesn't evenly divide the + // original result type. + if (DstTy == WideDstTy) { + MIRBuilder.buildMerge(DstReg, NewMergeRegs); + } else { + auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); + MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); } MI.eraseFromParent(); Index: lib/CodeGen/GlobalISel/MachineIRBuilder.cpp =================================================================== --- lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -568,6 +568,7 @@ // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector TmpVec(Ops.begin(), Ops.end()); + assert(TmpVec.size() > 1); return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec); } @@ -577,6 +578,7 @@ // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector TmpVec(Res.begin(), Res.end()); + assert(TmpVec.size() > 1); return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } @@ -596,6 +598,7 @@ // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector TmpVec(Res.begin(), Res.end()); + assert(TmpVec.size() > 1); return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } Index: test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir @@ -1,5 +1,11 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - %s 2> %t | FileCheck %s +# FileCheck -check-prefix=ERR %s < %t + +# ERR-NOT: remark: +# ERR: remark: :0:0: unable to legalize instruction: %197:_(s136) = G_INSERT %209:_, %206:_(s8), 128 (in function: test_merge_s68_s17_s17_s17_s17) +# ERR-NOT: remark: + --- name: test_merge_p1_s8 @@ -120,8 +126,9 @@ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C4]] ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 @@ -132,16 +139,20 @@ ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]] ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]] - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]] - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]] ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[AND3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]] - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[COPY7]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[MV]](s32) ; CHECK: $vgpr0 = COPY [[COPY8]](s32) %0:_(s8) = G_CONSTANT i8 0 %1:_(s8) = G_CONSTANT i8 1 @@ -250,8 +261,9 @@ ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C7]] ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 @@ -280,26 +292,39 @@ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[COPY11]] - ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) + ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C11]](s32) ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C7]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C8]] ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[AND7]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C8]] ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[COPY15]] - ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[COPY15]] + ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C12]](s32) - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C7]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[AND9]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C7]] + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[AND10]](s32) ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32) ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[COPY19]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[OR4]](s32) - ; CHECK: S_NOP 0, implicit [[TRUNC]](s24) + ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C13]](s32) + ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C7]] + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C8]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[AND12]](s32) + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[OR4]](s32) + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SHL5]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY22]], [[COPY23]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR5]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s24) = G_TRUNC [[MV]](s32) + ; CHECK: S_NOP 0, implicit [[TRUNC2]](s24) %0:_(s4) = G_CONSTANT i4 0 %1:_(s4) = G_CONSTANT i4 1 %2:_(s4) = G_CONSTANT i4 2 @@ -322,8 +347,9 @@ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 268435455 + ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C7]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C8]] ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 @@ -352,35 +378,39 @@ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[OR1]](s32) ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[COPY11]] - ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) + ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C12]](s32) ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C8]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[AND7]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[COPY15]] - ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[COPY15]] + ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C13]](s32) - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[AND9]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]] + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[AND10]](s32) ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[OR3]](s32) ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32) ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[COPY19]] - ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C14]](s32) - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]] - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[AND11]](s32) + ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]] + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[AND12]](s32) ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[OR4]](s32) ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SHL5]](s32) ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY22]], [[COPY23]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s28) = G_TRUNC [[OR5]](s32) - ; CHECK: S_NOP 0, implicit [[TRUNC]](s28) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR5]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s28) = G_TRUNC [[MV]](s32) + ; CHECK: S_NOP 0, implicit [[TRUNC2]](s28) %0:_(s4) = G_CONSTANT i4 0 %1:_(s4) = G_CONSTANT i4 1 %2:_(s4) = G_CONSTANT i4 2 @@ -552,6 +582,7 @@ ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C7]](s32) @@ -587,7 +618,17 @@ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[COPY11]] ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[C6]](s32) + ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C12]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C8]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[AND9]](s32) + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C9]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[COPY15]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; CHECK: [[TRUNC4:%[0-9]+]]:_(s56) = G_TRUNC [[MV]](s64) ; CHECK: S_NOP 0, implicit [[TRUNC4]](s56) @@ -601,3 +642,163 @@ %7:_(s56) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5, %6 S_NOP 0, implicit %7 ... + +--- +name: test_merge_s68_s17_s17_s17_s17 +body: | + bb.0: + ; CHECK-LABEL: name: test_merge_s68_s17_s17_s17_s17 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC %142(s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC %143(s16) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s1) = G_TRUNC %144(s16) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s1) = G_TRUNC %145(s16) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s17) = G_TRUNC [[C1]](s32) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s1) = G_TRUNC %146(s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s1) = G_TRUNC %147(s16) + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s17) = G_TRUNC [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s17) = G_TRUNC [[C3]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s272) = G_ZEXT [[TRUNC2]](s17) + ; CHECK: [[C4:%[0-9]+]]:_(s272) = G_CONSTANT i272 15 + ; CHECK: [[SHL:%[0-9]+]]:_(s272) = G_SHL [[ZEXT]], [[C4]](s272) + ; CHECK: [[OR:%[0-9]+]]:_(s272) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[C5:%[0-9]+]]:_(s272) = G_CONSTANT i272 30 + ; CHECK: [[SHL1:%[0-9]+]]:_(s272) = G_SHL [[OR]], [[C5]](s272) + ; CHECK: [[OR1:%[0-9]+]]:_(s272) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[C6:%[0-9]+]]:_(s272) = G_CONSTANT i272 45 + ; CHECK: [[SHL2:%[0-9]+]]:_(s272) = G_SHL [[OR1]], [[C6]](s272) + ; CHECK: [[OR2:%[0-9]+]]:_(s272) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[C7:%[0-9]+]]:_(s272) = G_CONSTANT i272 60 + ; CHECK: [[SHL3:%[0-9]+]]:_(s272) = G_SHL [[OR2]], [[C7]](s272) + ; CHECK: [[OR3:%[0-9]+]]:_(s272) = G_OR [[OR2]], [[SHL3]] + ; CHECK: [[C8:%[0-9]+]]:_(s272) = G_CONSTANT i272 75 + ; CHECK: [[SHL4:%[0-9]+]]:_(s272) = G_SHL [[OR3]], [[C8]](s272) + ; CHECK: [[OR4:%[0-9]+]]:_(s272) = G_OR [[OR3]], [[SHL4]] + ; CHECK: [[C9:%[0-9]+]]:_(s272) = G_CONSTANT i272 90 + ; CHECK: [[SHL5:%[0-9]+]]:_(s272) = G_SHL [[OR4]], [[C9]](s272) + ; CHECK: [[OR5:%[0-9]+]]:_(s272) = G_OR [[OR4]], [[SHL5]] + ; CHECK: [[C10:%[0-9]+]]:_(s272) = G_CONSTANT i272 105 + ; CHECK: [[SHL6:%[0-9]+]]:_(s272) = G_SHL [[OR5]], [[C10]](s272) + ; CHECK: [[OR6:%[0-9]+]]:_(s272) = G_OR [[OR5]], [[SHL6]] + ; CHECK: [[C11:%[0-9]+]]:_(s272) = G_CONSTANT i272 120 + ; CHECK: [[SHL7:%[0-9]+]]:_(s272) = G_SHL [[OR6]], [[C11]](s272) + ; CHECK: [[OR7:%[0-9]+]]:_(s272) = G_OR [[OR6]], [[SHL7]] + ; CHECK: [[C12:%[0-9]+]]:_(s272) = G_CONSTANT i272 135 + ; CHECK: [[SHL8:%[0-9]+]]:_(s272) = G_SHL [[OR7]], [[C12]](s272) + ; CHECK: [[OR8:%[0-9]+]]:_(s272) = G_OR [[OR7]], [[SHL8]] + ; CHECK: [[C13:%[0-9]+]]:_(s272) = G_CONSTANT i272 150 + ; CHECK: [[SHL9:%[0-9]+]]:_(s272) = G_SHL [[OR8]], [[C13]](s272) + ; CHECK: [[OR9:%[0-9]+]]:_(s272) = G_OR [[OR8]], [[SHL9]] + ; CHECK: [[C14:%[0-9]+]]:_(s272) = G_CONSTANT i272 165 + ; CHECK: [[SHL10:%[0-9]+]]:_(s272) = G_SHL [[OR9]], [[C14]](s272) + ; CHECK: [[OR10:%[0-9]+]]:_(s272) = G_OR [[OR9]], [[SHL10]] + ; CHECK: [[C15:%[0-9]+]]:_(s272) = G_CONSTANT i272 180 + ; CHECK: [[SHL11:%[0-9]+]]:_(s272) = G_SHL [[OR10]], [[C15]](s272) + ; CHECK: [[OR11:%[0-9]+]]:_(s272) = G_OR [[OR10]], [[SHL11]] + ; CHECK: [[C16:%[0-9]+]]:_(s272) = G_CONSTANT i272 195 + ; CHECK: [[SHL12:%[0-9]+]]:_(s272) = G_SHL [[OR11]], [[C16]](s272) + ; CHECK: [[OR12:%[0-9]+]]:_(s272) = G_OR [[OR11]], [[SHL12]] + ; CHECK: [[C17:%[0-9]+]]:_(s272) = G_CONSTANT i272 210 + ; CHECK: [[SHL13:%[0-9]+]]:_(s272) = G_SHL [[OR12]], [[C17]](s272) + ; CHECK: [[OR13:%[0-9]+]]:_(s272) = G_OR [[OR12]], [[SHL13]] + ; CHECK: [[C18:%[0-9]+]]:_(s272) = G_CONSTANT i272 225 + ; CHECK: [[SHL14:%[0-9]+]]:_(s272) = G_SHL [[OR13]], [[C18]](s272) + ; CHECK: [[OR14:%[0-9]+]]:_(s272) = G_OR [[OR13]], [[SHL14]] + ; CHECK: [[C19:%[0-9]+]]:_(s272) = G_CONSTANT i272 240 + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s32) = G_TRUNC [[C19]](s272) + ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 + ; CHECK: [[UV:%[0-9]+]]:_(s136), [[UV1:%[0-9]+]]:_(s136) = G_UNMERGE_VALUES [[OR14]](s272) + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC10]], [[C20]] + ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C20]], [[TRUNC10]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC10]](s32), [[C20]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC10]](s32), [[C]] + ; CHECK: [[SHL15:%[0-9]+]]:_(s136) = G_SHL [[UV1]], [[TRUNC10]](s32) + ; CHECK: [[SHL16:%[0-9]+]]:_(s136) = G_SHL [[UV1]], [[TRUNC10]](s32) + ; CHECK: [[LSHR:%[0-9]+]]:_(s136) = G_LSHR [[UV]], [[SUB1]](s32) + ; CHECK: [[OR15:%[0-9]+]]:_(s136) = G_OR [[SHL16]], [[LSHR]] + ; CHECK: [[C21:%[0-9]+]]:_(s136) = G_CONSTANT i136 0 + ; CHECK: [[SHL17:%[0-9]+]]:_(s136) = G_SHL [[UV]], [[SUB]](s32) + ; CHECK: [[SELECT:%[0-9]+]]:_(s136) = G_SELECT [[ICMP]](s1), [[SHL15]], [[C21]] + ; CHECK: [[SELECT1:%[0-9]+]]:_(s136) = G_SELECT [[ICMP]](s1), [[OR15]], [[SHL17]] + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[UV1]](s136), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[UV1]](s136), 64 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s8) = G_EXTRACT [[UV1]](s136), 128 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[SELECT1]](s136), 0 + ; CHECK: [[EXTRACT4:%[0-9]+]]:_(s64) = G_EXTRACT [[SELECT1]](s136), 64 + ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s8) = G_EXTRACT [[SELECT1]](s136), 128 + ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[EXTRACT]], [[EXTRACT3]] + ; CHECK: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[EXTRACT1]], [[EXTRACT4]] + ; CHECK: [[SELECT4:%[0-9]+]]:_(s8) = G_SELECT [[ICMP1]](s1), [[EXTRACT2]], [[EXTRACT5]] + ; CHECK: [[DEF:%[0-9]+]]:_(s136) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(s136) = G_INSERT [[DEF]], [[SELECT2]](s64), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(s136) = G_INSERT [[INSERT]], [[SELECT3]](s64), 64 + ; CHECK: [[INSERT2:%[0-9]+]]:_(s136) = G_INSERT [[INSERT1]], [[SELECT4]](s8), 128 + ; CHECK: [[MV:%[0-9]+]]:_(s272) = G_MERGE_VALUES [[SELECT]](s136), [[INSERT2]](s136) + ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s64) = G_EXTRACT [[OR14]](s272), 0 + ; CHECK: [[EXTRACT7:%[0-9]+]]:_(s64) = G_EXTRACT [[OR14]](s272), 64 + ; CHECK: [[EXTRACT8:%[0-9]+]]:_(s64) = G_EXTRACT [[OR14]](s272), 128 + ; CHECK: [[EXTRACT9:%[0-9]+]]:_(s64) = G_EXTRACT [[OR14]](s272), 192 + ; CHECK: [[EXTRACT10:%[0-9]+]]:_(s16) = G_EXTRACT [[OR14]](s272), 256 + ; CHECK: [[EXTRACT11:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s272), 0 + ; CHECK: [[EXTRACT12:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s272), 64 + ; CHECK: [[EXTRACT13:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s272), 128 + ; CHECK: [[EXTRACT14:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s272), 192 + ; CHECK: [[EXTRACT15:%[0-9]+]]:_(s16) = G_EXTRACT [[MV]](s272), 256 + ; CHECK: [[OR16:%[0-9]+]]:_(s64) = G_OR [[EXTRACT6]], [[EXTRACT11]] + ; CHECK: [[OR17:%[0-9]+]]:_(s64) = G_OR [[EXTRACT7]], [[EXTRACT12]] + ; CHECK: [[OR18:%[0-9]+]]:_(s64) = G_OR [[EXTRACT8]], [[EXTRACT13]] + ; CHECK: [[OR19:%[0-9]+]]:_(s64) = G_OR [[EXTRACT9]], [[EXTRACT14]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT10]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT15]](s16) + ; CHECK: [[OR20:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[OR20]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s272) = G_TRUNC [[DEF1]](s512) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s512) = G_ANYEXT [[TRUNC12]](s272) + ; CHECK: [[INSERT3:%[0-9]+]]:_(s512) = G_INSERT [[ANYEXT2]], [[OR16]](s64), 0 + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s272) = G_TRUNC [[INSERT3]](s512) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s512) = G_ANYEXT [[TRUNC13]](s272) + ; CHECK: [[INSERT4:%[0-9]+]]:_(s512) = G_INSERT [[ANYEXT3]], [[OR17]](s64), 64 + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s272) = G_TRUNC [[INSERT4]](s512) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s512) = G_ANYEXT [[TRUNC14]](s272) + ; CHECK: [[INSERT5:%[0-9]+]]:_(s512) = G_INSERT [[ANYEXT4]], [[OR18]](s64), 128 + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s272) = G_TRUNC [[INSERT5]](s512) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s512) = G_ANYEXT [[TRUNC15]](s272) + ; CHECK: [[INSERT6:%[0-9]+]]:_(s512) = G_INSERT [[ANYEXT5]], [[OR19]](s64), 192 + ; CHECK: [[TRUNC16:%[0-9]+]]:_(s272) = G_TRUNC [[INSERT6]](s512) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s512) = G_ANYEXT [[TRUNC16]](s272) + ; CHECK: [[INSERT7:%[0-9]+]]:_(s512) = G_INSERT [[ANYEXT6]], [[TRUNC11]](s16), 256 + ; CHECK: [[TRUNC17:%[0-9]+]]:_(s272) = G_TRUNC [[INSERT7]](s512) + ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC17]](s272) + ; CHECK: [[TRUNC18:%[0-9]+]]:_(s1) = G_TRUNC [[UV2]](s16) + ; CHECK: [[UV19:%[0-9]+]]:_(s1), [[UV20:%[0-9]+]]:_(s1), [[UV21:%[0-9]+]]:_(s1), [[UV22:%[0-9]+]]:_(s1), [[UV23:%[0-9]+]]:_(s1), [[UV24:%[0-9]+]]:_(s1), [[UV25:%[0-9]+]]:_(s1), [[UV26:%[0-9]+]]:_(s1), [[UV27:%[0-9]+]]:_(s1), [[UV28:%[0-9]+]]:_(s1), [[UV29:%[0-9]+]]:_(s1), [[UV30:%[0-9]+]]:_(s1), [[UV31:%[0-9]+]]:_(s1), [[UV32:%[0-9]+]]:_(s1), [[UV33:%[0-9]+]]:_(s1), [[UV34:%[0-9]+]]:_(s1), [[UV35:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[TRUNC5]](s17) + ; CHECK: [[TRUNC19:%[0-9]+]]:_(s1) = G_TRUNC [[UV3]](s16) + ; CHECK: [[UV36:%[0-9]+]]:_(s1), [[UV37:%[0-9]+]]:_(s1), [[UV38:%[0-9]+]]:_(s1), [[UV39:%[0-9]+]]:_(s1), [[UV40:%[0-9]+]]:_(s1), [[UV41:%[0-9]+]]:_(s1), [[UV42:%[0-9]+]]:_(s1), [[UV43:%[0-9]+]]:_(s1), [[UV44:%[0-9]+]]:_(s1), [[UV45:%[0-9]+]]:_(s1), [[UV46:%[0-9]+]]:_(s1), [[UV47:%[0-9]+]]:_(s1), [[UV48:%[0-9]+]]:_(s1), [[UV49:%[0-9]+]]:_(s1), [[UV50:%[0-9]+]]:_(s1), [[UV51:%[0-9]+]]:_(s1), [[UV52:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[TRUNC8]](s17) + ; CHECK: [[TRUNC20:%[0-9]+]]:_(s1) = G_TRUNC [[UV4]](s16) + ; CHECK: [[UV53:%[0-9]+]]:_(s1), [[UV54:%[0-9]+]]:_(s1), [[UV55:%[0-9]+]]:_(s1), [[UV56:%[0-9]+]]:_(s1), [[UV57:%[0-9]+]]:_(s1), [[UV58:%[0-9]+]]:_(s1), [[UV59:%[0-9]+]]:_(s1), [[UV60:%[0-9]+]]:_(s1), [[UV61:%[0-9]+]]:_(s1), [[UV62:%[0-9]+]]:_(s1), [[UV63:%[0-9]+]]:_(s1), [[UV64:%[0-9]+]]:_(s1), [[UV65:%[0-9]+]]:_(s1), [[UV66:%[0-9]+]]:_(s1), [[UV67:%[0-9]+]]:_(s1), [[UV68:%[0-9]+]]:_(s1), [[UV69:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[TRUNC9]](s17) + ; CHECK: [[TRUNC21:%[0-9]+]]:_(s1) = G_TRUNC [[UV5]](s16) + ; CHECK: [[DEF2:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; CHECK: [[TRUNC22:%[0-9]+]]:_(s1) = G_TRUNC [[UV6]](s16) + ; CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC18]](s1), [[TRUNC19]](s1), [[TRUNC20]](s1), [[TRUNC21]](s1), [[TRUNC22]](s1), %14(s1), %15(s1), %16(s1), %17(s1), %18(s1), %19(s1), [[TRUNC]](s1), [[TRUNC1]](s1), [[TRUNC3]](s1), [[TRUNC4]](s1), [[TRUNC6]](s1), [[TRUNC7]](s1), [[UV19]](s1), [[UV20]](s1), [[UV21]](s1), [[UV22]](s1), [[UV23]](s1), [[UV24]](s1), [[UV25]](s1), [[UV26]](s1), [[UV27]](s1), [[UV28]](s1), [[UV29]](s1), [[UV30]](s1), [[UV31]](s1), [[UV32]](s1), [[UV33]](s1) + ; CHECK: [[TRUNC23:%[0-9]+]]:_(s1) = G_TRUNC [[UV7]](s16) + ; CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV34]](s1), [[UV35]](s1), [[UV36]](s1), [[UV37]](s1), [[UV38]](s1), [[UV39]](s1), [[UV40]](s1), [[UV41]](s1), [[UV42]](s1), [[UV43]](s1), [[UV44]](s1), [[UV45]](s1), [[UV46]](s1), [[UV47]](s1), [[UV48]](s1), [[UV49]](s1), [[UV50]](s1), [[UV51]](s1), [[UV52]](s1), [[UV53]](s1), [[UV54]](s1), [[UV55]](s1), [[UV56]](s1), [[UV57]](s1), [[UV58]](s1), [[UV59]](s1), [[UV60]](s1), [[UV61]](s1), [[UV62]](s1), [[UV63]](s1), [[UV64]](s1), [[UV65]](s1) + ; CHECK: [[TRUNC24:%[0-9]+]]:_(s1) = G_TRUNC [[UV8]](s16) + ; CHECK: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV66]](s1), [[UV67]](s1), [[UV68]](s1), [[UV69]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1) + ; CHECK: [[TRUNC25:%[0-9]+]]:_(s1) = G_TRUNC [[UV9]](s16) + ; CHECK: [[MV4:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV1]](s32), [[MV2]](s32), [[MV3]](s32) + ; CHECK: [[TRUNC26:%[0-9]+]]:_(s1) = G_TRUNC [[UV10]](s16) + ; CHECK: [[TRUNC27:%[0-9]+]]:_(s68) = G_TRUNC [[MV4]](s96) + ; CHECK: [[TRUNC28:%[0-9]+]]:_(s1) = G_TRUNC [[UV11]](s16) + ; CHECK: S_NOP 0, implicit [[TRUNC27]](s68) + ; CHECK: [[TRUNC29:%[0-9]+]]:_(s1) = G_TRUNC [[UV12]](s16) + %0:_(s17) = G_CONSTANT i17 0 + %1:_(s17) = G_CONSTANT i17 1 + %2:_(s17) = G_CONSTANT i17 2 + %3:_(s17) = G_CONSTANT i17 3 + %4:_(s68) = G_MERGE_VALUES %0, %1, %2, %3 + S_NOP 0, implicit %4 +... Index: unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp =================================================================== --- unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -902,8 +902,12 @@ if (!TM) return; + const LLT S32 = LLT::scalar(32); const LLT S24 = LLT::scalar(24); + const LLT S21 = LLT::scalar(21); + const LLT S16 = LLT::scalar(16); const LLT S9 = LLT::scalar(9); + const LLT S8 = LLT::scalar(8); const LLT S3 = LLT::scalar(3); DefineLegalizerInfo(A, { @@ -919,13 +923,80 @@ // 24 = 3 3 3 3 3 3 3 3 // => 9 // - // This can do 2 merges for the first parts, but has 2 leftover operands. - SmallVector MergeOps; + // This can do 3 merges, but need an extra implicit_def. + SmallVector Merge0Ops; for (int I = 0; I != 8; ++I) - MergeOps.push_back(B.buildConstant(S3, I).getReg(0)); + Merge0Ops.push_back(B.buildConstant(S3, I).getReg(0)); - auto Merge = B.buildMerge(S24, MergeOps); - EXPECT_EQ(LegalizerHelper::LegalizeResult::UnableToLegalize, - Helper.lower(*Merge, 1, S9)); + auto Merge0 = B.buildMerge(S24, Merge0Ops); + + // 21 = 3 3 3 3 3 3 3 + // => 9, 2 extra implicit_def needed + // + SmallVector Merge1Ops; + for (int I = 0; I != 7; ++I) + Merge1Ops.push_back(B.buildConstant(S3, I).getReg(0)); + + auto Merge1 = B.buildMerge(S21, Merge1Ops); + + SmallVector Merge2Ops; + for (int I = 0; I != 2; ++I) + Merge2Ops.push_back(B.buildConstant(S8, I).getReg(0)); + + auto Merge2 = B.buildMerge(S16, Merge2Ops); + + + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.widenScalar(*Merge0, 1, S9)); + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.widenScalar(*Merge1, 1, S9)); + + // Request a source size greater than the original destination size. + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.widenScalar(*Merge2, 1, S32)); + + auto CheckStr = R"( + CHECK: [[K0:%[0-9]+]]:_(s3) = G_CONSTANT i3 0 + CHECK-NEXT: [[K1:%[0-9]+]]:_(s3) = G_CONSTANT i3 1 + CHECK-NEXT: [[K2:%[0-9]+]]:_(s3) = G_CONSTANT i3 2 + CHECK-NEXT: [[K3:%[0-9]+]]:_(s3) = G_CONSTANT i3 3 + CHECK-NEXT: [[K4:%[0-9]+]]:_(s3) = G_CONSTANT i3 -4 + CHECK-NEXT: [[K5:%[0-9]+]]:_(s3) = G_CONSTANT i3 -3 + CHECK-NEXT: [[K6:%[0-9]+]]:_(s3) = G_CONSTANT i3 -2 + CHECK-NEXT: [[K7:%[0-9]+]]:_(s3) = G_CONSTANT i3 -1 + CHECK-NEXT: [[IMPDEF0:%[0-9]+]]:_(s3) = G_IMPLICIT_DEF + CHECK-NEXT: [[MERGE0:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K0]]:_(s3), [[K1]]:_(s3), [[K2]]:_(s3) + CHECK-NEXT: [[MERGE1:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K3]]:_(s3), [[K4]]:_(s3), [[K5]]:_(s3) + CHECK-NEXT: [[MERGE2:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K6]]:_(s3), [[K7]]:_(s3), [[IMPDEF0]]:_(s3) + CHECK-NEXT: [[MERGE3:%[0-9]+]]:_(s27) = G_MERGE_VALUES [[MERGE0]]:_(s9), [[MERGE1]]:_(s9), [[MERGE2]]:_(s9) + CHECK-NEXT: (s24) = G_TRUNC [[MERGE3]]:_(s27) + + + CHECK: [[K8:%[0-9]+]]:_(s3) = G_CONSTANT i3 0 + CHECK-NEXT: [[K9:%[0-9]+]]:_(s3) = G_CONSTANT i3 1 + CHECK-NEXT: [[K10:%[0-9]+]]:_(s3) = G_CONSTANT i3 2 + CHECK-NEXT: [[K11:%[0-9]+]]:_(s3) = G_CONSTANT i3 3 + CHECK-NEXT: [[K12:%[0-9]+]]:_(s3) = G_CONSTANT i3 -4 + CHECK-NEXT: [[K13:%[0-9]+]]:_(s3) = G_CONSTANT i3 -3 + CHECK-NEXT: [[K14:%[0-9]+]]:_(s3) = G_CONSTANT i3 -2 + CHECK-NEXT: [[IMPDEF1:%[0-9]+]]:_(s3) = G_IMPLICIT_DEF + CHECK-NEXT: [[MERGE4:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K8]]:_(s3), [[K9]]:_(s3), [[K10]]:_(s3) + CHECK-NEXT: [[MERGE5:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K11]]:_(s3), [[K12]]:_(s3), [[K13]]:_(s3) + CHECK-NEXT: [[MERGE6:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K14]]:_(s3), [[IMPDEF1]]:_(s3), [[IMPDEF1]]:_(s3) + CHECK-NEXT: [[MERGE7:%[0-9]+]]:_(s27) = G_MERGE_VALUES [[MERGE4]]:_(s9), [[MERGE5]]:_(s9), [[MERGE6]]:_(s9) + CHECK-NEXT: (s21) = G_TRUNC [[MERGE7]]:_(s27) + + + CHECK: [[K15:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + CHECK-NEXT: [[K16:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 + CHECK-NEXT: [[ZEXT_K15:[0-9]+]]:_(s32) = G_ZEXT [[K15]]:_(s8) + CHECK-NEXT: [[ZEXT_K16:[0-9]+]]:_(s32) = G_ZEXT [[K16]]:_(s8) + [[K16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT_K16]]:_, [[K16]]:_(s32) + [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT_K16]]:_, [[SHL]]:_ + (s16) = G_TRUNC [[OR]]:_(s32) + )"; + + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } } // namespace