Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -459,18 +459,35 @@ DstRegs.push_back(MI.getOperand(DefIdx).getReg()); if (ConvertOp) { - SmallVector TmpRegs; - // This is a vector that is being scalarized and casted. Extract to - // the element type, and do the conversion on the scalars. - LLT MergeEltTy = - MRI.getType(MergeI->getOperand(0).getReg()).getElementType(); - for (unsigned j = 0; j < NumMergeRegs; ++j) - TmpRegs.push_back(MRI.createGenericVirtualRegister(MergeEltTy)); + LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); + + // This is a vector that is being split and casted. Extract to the + // element type, and do the conversion on the scalars (or smaller + // vectors). + LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs); + + // Handle split to smaller vectors, with converssions. + // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>) + // %3(<8 x s16>) = G_SEXT %2 + // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3 + // + // => + // + // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0 + // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1 + // %4(<2 x s16>) = G_SEXT %8 + // %5(<2 x s16>) = G_SEXT %9 + // %6(<2 x s16>) = G_SEXT %10 + // %7(<2 x s16>)= G_SEXT %11 + + SmallVector TmpRegs(NewNumDefs); + for (unsigned k = 0; k < NewNumDefs; ++k) + TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy); Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg()); - for (unsigned j = 0; j < NumMergeRegs; ++j) - Builder.buildInstr(ConvertOp, {DstRegs[j]}, {TmpRegs[j]}); + for (unsigned k = 0; k < NewNumDefs; ++k) + Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]}); } else { Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); } Index: llvm/include/llvm/Support/LowLevelTypeImpl.h =================================================================== --- llvm/include/llvm/Support/LowLevelTypeImpl.h +++ llvm/include/llvm/Support/LowLevelTypeImpl.h @@ -143,6 +143,16 @@ return LLT::scalarOrVector(NewNumElts, getScalarType()); } + /// Return a type that is \p Factor times smaller. Reduces the number of + /// elements if this is a vector, or the bitwidth for scalar/pointers. Does + /// not attempt to handle cases that aren't evenly divisible. + LLT divide(int Factor) const { + assert(Factor != 1); + if (isVector()) + return scalarOrVector(getNumElements() / Factor, getElementType()); + return scalar(getSizeInBits() / Factor); + } + bool isByteSized() const { return (getSizeInBits() & 7) == 0; } unsigned getScalarSizeInBits() const { Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1317,12 +1317,6 @@ return 0; } -static LLT divideLLT(LLT Ty, int Factor) { - if (Ty.isVector()) - return LLT::vector(Ty.getNumElements() / Factor, Ty.getElementType()); - return LLT::scalar(Ty.getSizeInBits() / Factor); -} - bool AMDGPURegisterBankInfo::applyMappingSBufferLoad( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -1347,7 +1341,7 @@ int NumLoads = 1; if (LoadSize == 256 || LoadSize == 512) { NumLoads = LoadSize / 128; - Ty = divideLLT(Ty, NumLoads); + Ty = Ty.divide(NumLoads); } // Use the alignment to ensure that the required offsets will fit into the Index: llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -574,3 +574,329 @@ %4:_(<3 x s16>), %5:_(<3 x s16>), %6:_(<3 x s16>), %7:_(<3 x s16>) = G_UNMERGE_VALUES %3 S_ENDPGM 0, implicit %4, implicit %5, implicit %6, implicit %7 ... + +--- +name: unmerge_v2s16_from_v4s16_sext_v4s8_concat_vectors_v2s8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: unmerge_v2s16_from_v4s16_sext_v4s8_concat_vectors_v2s8 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 8 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32) + ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 8 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s8) = G_TRUNC %0 + %5:_(s8) = G_TRUNC %1 + %6:_(s8) = G_TRUNC %2 + %7:_(s8) = G_TRUNC %3 + %8:_(<2 x s8>) = G_BUILD_VECTOR %4, %5 + %9:_(<2 x s8>) = G_BUILD_VECTOR %6, %7 + %10:_(<4 x s8>) = G_CONCAT_VECTORS %8, %9 + %11:_(<4 x s16>) = G_SEXT %10 + %12:_(<2 x s16>), %13:_(<2 x s16>) = G_UNMERGE_VALUES %11 + S_ENDPGM 0, implicit %12, implicit %13 +... + +--- +name: unmerge_v2s16_from_v8s16_sext_v8s8_concat_vectors_v4s8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + + ; CHECK-LABEL: name: unmerge_v2s16_from_v8s16_sext_v8s8_concat_vectors_v4s8 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY12]], 8 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY13]], 8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY16]], 8 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) + ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY17]], 8 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C]] + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32) + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32) + ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY24]], 8 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY21]](s32) + ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY25]], 8 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C]] + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY22]](s32) + ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY28]], 8 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY23]](s32) + ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY29]], 8 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG6]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C]] + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG7]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s32) = COPY $vgpr7 + %8:_(s8) = G_TRUNC %0 + %9:_(s8) = G_TRUNC %1 + %10:_(s8) = G_TRUNC %2 + %11:_(s8) = G_TRUNC %3 + %12:_(s8) = G_TRUNC %4 + %13:_(s8) = G_TRUNC %5 + %14:_(s8) = G_TRUNC %6 + %15:_(s8) = G_TRUNC %7 + %16:_(<4 x s8>) = G_BUILD_VECTOR %8, %9, %10, %11 + %17:_(<4 x s8>) = G_BUILD_VECTOR %12, %13, %14, %15 + %18:_(<8 x s8>) = G_CONCAT_VECTORS %16, %17 + %19:_(<8 x s16>) = G_SEXT %18 + %20:_(<2 x s16>), %21:_(<2 x s16>), %22:_(<2 x s16>), %23:_(<2 x s16>) = G_UNMERGE_VALUES %19 + S_ENDPGM 0, implicit %20, implicit %21, implicit %22, implicit %23 +... + +--- +name: unmerge_v2s16_from_v16s16_sext_v16s8_concat_vectors_v8s8 +tracksRegLiveness: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + + ; CHECK-LABEL: name: unmerge_v2s16_from_v16s16_sext_v16s8_concat_vectors_v8s8 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32) + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[COPY16]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY24]], 8 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[COPY17]](s32) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY25]], 8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY26]], [[C]] + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY18]](s32) + ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY28]], 8 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY19]](s32) + ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY29]], 8 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY30]], [[C]] + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY20]](s32) + ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY32]], 8 + ; CHECK: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY21]](s32) + ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY33]], 8 + ; CHECK: [[COPY34:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY34]], [[C]] + ; CHECK: [[COPY35:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK: [[COPY36:%[0-9]+]]:_(s32) = COPY [[COPY22]](s32) + ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY36]], 8 + ; CHECK: [[COPY37:%[0-9]+]]:_(s32) = COPY [[COPY23]](s32) + ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY37]], 8 + ; CHECK: [[COPY38:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG6]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY38]], [[C]] + ; CHECK: [[COPY39:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG7]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY39]], [[C]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK: [[COPY40:%[0-9]+]]:_(s32) = COPY [[COPY8]](s32) + ; CHECK: [[COPY41:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) + ; CHECK: [[COPY42:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) + ; CHECK: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) + ; CHECK: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) + ; CHECK: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) + ; CHECK: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY14]](s32) + ; CHECK: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY15]](s32) + ; CHECK: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY40]](s32) + ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY48]], 8 + ; CHECK: [[COPY49:%[0-9]+]]:_(s32) = COPY [[COPY41]](s32) + ; CHECK: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY49]], 8 + ; CHECK: [[COPY50:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG8]](s32) + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY50]], [[C]] + ; CHECK: [[COPY51:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG9]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY51]], [[C]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK: [[COPY52:%[0-9]+]]:_(s32) = COPY [[COPY42]](s32) + ; CHECK: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY52]], 8 + ; CHECK: [[COPY53:%[0-9]+]]:_(s32) = COPY [[COPY43]](s32) + ; CHECK: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY53]], 8 + ; CHECK: [[COPY54:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG10]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY54]], [[C]] + ; CHECK: [[COPY55:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG11]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY55]], [[C]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK: [[COPY56:%[0-9]+]]:_(s32) = COPY [[COPY44]](s32) + ; CHECK: [[SEXT_INREG12:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY56]], 8 + ; CHECK: [[COPY57:%[0-9]+]]:_(s32) = COPY [[COPY45]](s32) + ; CHECK: [[SEXT_INREG13:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY57]], 8 + ; CHECK: [[COPY58:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG12]](s32) + ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY58]], [[C]] + ; CHECK: [[COPY59:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG13]](s32) + ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY59]], [[C]] + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] + ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK: [[COPY60:%[0-9]+]]:_(s32) = COPY [[COPY46]](s32) + ; CHECK: [[SEXT_INREG14:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY60]], 8 + ; CHECK: [[COPY61:%[0-9]+]]:_(s32) = COPY [[COPY47]](s32) + ; CHECK: [[SEXT_INREG15:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY61]], 8 + ; CHECK: [[COPY62:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG14]](s32) + ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY62]], [[C]] + ; CHECK: [[COPY63:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG15]](s32) + ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY63]], [[C]] + ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C1]](s32) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] + ; CHECK: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s16>), implicit [[BITCAST1]](<2 x s16>), implicit [[BITCAST2]](<2 x s16>), implicit [[BITCAST3]](<2 x s16>), implicit [[BITCAST4]](<2 x s16>), implicit [[BITCAST5]](<2 x s16>), implicit [[BITCAST6]](<2 x s16>), implicit [[BITCAST7]](<2 x s16>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s32) = COPY $vgpr7 + %8:_(s32) = COPY $vgpr8 + %9:_(s32) = COPY $vgpr9 + %10:_(s32) = COPY $vgpr10 + %11:_(s32) = COPY $vgpr11 + %12:_(s32) = COPY $vgpr12 + %13:_(s32) = COPY $vgpr13 + %14:_(s32) = COPY $vgpr14 + %15:_(s32) = COPY $vgpr15 + %16:_(s8) = G_TRUNC %0 + %17:_(s8) = G_TRUNC %1 + %18:_(s8) = G_TRUNC %2 + %19:_(s8) = G_TRUNC %3 + %20:_(s8) = G_TRUNC %4 + %21:_(s8) = G_TRUNC %5 + %22:_(s8) = G_TRUNC %6 + %23:_(s8) = G_TRUNC %7 + %24:_(s8) = G_TRUNC %8 + %25:_(s8) = G_TRUNC %9 + %26:_(s8) = G_TRUNC %10 + %27:_(s8) = G_TRUNC %11 + %28:_(s8) = G_TRUNC %12 + %29:_(s8) = G_TRUNC %13 + %30:_(s8) = G_TRUNC %14 + %31:_(s8) = G_TRUNC %15 + %32:_(<8 x s8>) = G_BUILD_VECTOR %16, %17, %18, %19, %20, %21, %22, %23 + %33:_(<8 x s8>) = G_BUILD_VECTOR %24, %25, %26, %27, %28, %29, %30, %31 + %34:_(<16 x s8>) = G_CONCAT_VECTORS %32, %33 + %35:_(<16 x s16>) = G_SEXT %34 + %36:_(<2 x s16>), %37:_(<2 x s16>), %38:_(<2 x s16>), %39:_(<2 x s16>), %40:_(<2 x s16>), %41:_(<2 x s16>), %42:_(<2 x s16>), %43:_(<2 x s16>) = G_UNMERGE_VALUES %35 + S_ENDPGM 0, implicit %36, implicit %37, implicit %38, implicit %39, implicit %40, implicit %41, implicit %42, implicit %43 +... Index: llvm/unittests/CodeGen/LowLevelTypeTest.cpp =================================================================== --- llvm/unittests/CodeGen/LowLevelTypeTest.cpp +++ llvm/unittests/CodeGen/LowLevelTypeTest.cpp @@ -238,4 +238,24 @@ ASSERT_FALSE(Ty.isVector()); } +TEST(LowLevelTypeTest, Divide) { + // Test basic scalar->scalar cases. + EXPECT_EQ(LLT::scalar(16), LLT::scalar(32).divide(2)); + EXPECT_EQ(LLT::scalar(8), LLT::scalar(32).divide(4)); + EXPECT_EQ(LLT::scalar(8), LLT::scalar(32).divide(4)); + + // Test pointer->scalar + EXPECT_EQ(LLT::scalar(32), LLT::pointer(0, 64).divide(2)); + + // Test dividing vectors. + EXPECT_EQ(LLT::scalar(32), LLT::vector(2, 32).divide(2)); + EXPECT_EQ(LLT::vector(2, 32), LLT::vector(4, 32).divide(2)); + + // Test vector of pointers + EXPECT_EQ(LLT::pointer(1, 64), + LLT::vector(4, LLT::pointer(1, 64)).divide(4)); + EXPECT_EQ(LLT::vector(2, LLT::pointer(1, 64)), + LLT::vector(4, LLT::pointer(1, 64)).divide(2)); +} + }