Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -736,6 +736,17 @@ MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_INSERT_VECTOR_ELT: { + if (TypeIdx != 2) + return UnableToLegalize; + + int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; + Observer.changingInstr(MI); + narrowScalarSrc(MI, NarrowTy, OpIdx); + Observer.changedInstr(MI); + return Legalized; + } } } Index: test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -646,3 +646,23 @@ %2:_(s128) = G_EXTRACT_VECTOR_ELT %0, %1 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 ... + +--- +name: extract_vector_elt_v2i32_varidx_i64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: extract_vector_elt_v2i32_varidx_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[TRUNC]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... + Index: test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -2,12 +2,12 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s --- -name: insert_vector_elt_0_v2i32 +name: insert_vector_elt_0_v2s32 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-LABEL: name: insert_vector_elt_0_v2i32 + ; CHECK-LABEL: name: insert_vector_elt_0_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -19,3 +19,45 @@ %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $vgpr0_vgpr1 = COPY %3 ... + +--- +name: insert_vector_elt_v2s32_varidx_i64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3_vgpr4 + + ; CHECK-LABEL: name: insert_vector_elt_v2s32_varidx_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s64) = COPY $vgpr3_vgpr4 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: insert_vector_elt_v16s32_varidx_i64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16, $vgpr17_vgpr18 + + ; CHECK-LABEL: name: insert_vector_elt_v16s32_varidx_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr17_vgpr18 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK: [[IVEC:%[0-9]+]]:_(<16 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32) + ; CHECK: S_ENDPGM 0, implicit [[IVEC]](<16 x s32>) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(s32) = COPY $vgpr16 + %2:_(s64) = COPY $vgpr17_vgpr18 + %3:_(<16 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + S_ENDPGM 0, implicit %3 +...