Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -707,6 +707,31 @@ switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_EXTRACT: { + if (TypeIdx != 1) + return UnableToLegalize; + + unsigned SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + if (!SrcTy.isVector()) + return UnableToLegalize; + + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (DstTy != SrcTy.getElementType()) + return UnableToLegalize; + + unsigned Offset = MI.getOperand(2).getImm(); + if (Offset % SrcTy.getScalarSizeInBits() != 0) + return UnableToLegalize; + + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + + MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * + Offset); + widenScalarDst(MI, WideTy.getScalarType(), 0); + return Legalized; + } case TargetOpcode::G_UADDO: case TargetOpcode::G_USUBO: { if (TypeIdx == 1) Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -380,7 +380,14 @@ const LLT &Ty1 = Query.Types[1]; return (Ty0.getSizeInBits() % 16 == 0) && (Ty1.getSizeInBits() % 16 == 0); - }); + }) + .widenScalarIf( + [=](const LegalityQuery &Query) { + const LLT &Ty0 = Query.Types[0]; + const LLT &Ty1 = Query.Types[1]; + return (Ty1.getScalarSizeInBits() < 16); + }, + LegalizeMutations::widenScalarToNextPow2(1, 16)); getActionDefinitionsBuilder(G_BUILD_VECTOR) .legalForCartesianProduct(AllS32Vectors, {S32}) Index: test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -103,3 +103,135 @@ %1:_(s32) = G_EXTRACT %0, 64 S_ENDPGM implicit %1 ... + +--- +name: extract_s8_v4s8_offset0 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v4s8_offset0 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<4 x s16>), 0 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_EXTRACT %0, 0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s8_v4s8_offset8 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v4s8_offset8 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<4 x s16>), 16 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_EXTRACT %0, 8 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s8_v4s8_offset16 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v4s8_offset16 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<4 x s16>), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s8_v4s8_offset24 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v4s8_offset24 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<4 x s16>), 48 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<4 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_EXTRACT %0, 24 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s8_v3s8_offset16 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v3s8_offset16 + ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<3 x s16>), 32 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<3 x s8>) = G_IMPLICIT_DEF + %1:_(s8) = G_EXTRACT %0, 16 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_s8_v5s1_offset4 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_s8_v5s1_offset4 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<5 x s16>), 80 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(<5 x s1>) = G_IMPLICIT_DEF + %1:_(s1) = G_EXTRACT %0, 5 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_v2s16_v4s16_offset32 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_v2s16_v4s16_offset32 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 32 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + %0:_(<4 x s16>) = G_IMPLICIT_DEF + %1:_(<2 x s16>) = G_EXTRACT %0, 32 + $vgpr0 = COPY %1 +... + +--- +name: extract_v2s16_v6s16_offset32 +legalized: true + +body: | + bb.0: + ; CHECK-LABEL: name: extract_v2s16_v6s16_offset32 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 32 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + %0:_(<6 x s16>) = G_IMPLICIT_DEF + %1:_(<2 x s16>) = G_EXTRACT %0, 32 + $vgpr0 = COPY %1 +...