Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1649,8 +1649,12 @@ // TODO: Should move some of this into LegalizerHelper. // TODO: Promote dynamic indexing of s16 to s32 - // TODO: Dynamic s64 indexing is only legal for SGPR. - Optional IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI); + + // FIXME: Artifact combiner probably should have replaced the truncated + // constant before this, so we shouldn't need + // getConstantVRegValWithLookThrough. + Optional IdxVal = getConstantVRegValWithLookThrough( + MI.getOperand(2).getReg(), MRI); if (!IdxVal) // Dynamic case will be selected to register indexing. return true; @@ -1663,8 +1667,8 @@ B.setInstr(MI); - if (IdxVal.getValue() < VecTy.getNumElements()) - B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits()); + if (IdxVal->Value < VecTy.getNumElements()) + B.buildExtract(Dst, Vec, IdxVal->Value * EltTy.getSizeInBits()); else B.buildUndef(Dst); @@ -1678,8 +1682,12 @@ // TODO: Should move some of this into LegalizerHelper. // TODO: Promote dynamic indexing of s16 to s32 - // TODO: Dynamic s64 indexing is only legal for SGPR. - Optional IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI); + + // FIXME: Artifact combiner probably should have replaced the truncated + // constant before this, so we shouldn't need + // getConstantVRegValWithLookThrough. + Optional IdxVal = getConstantVRegValWithLookThrough( + MI.getOperand(3).getReg(), MRI); if (!IdxVal) // Dynamic case will be selected to register indexing. return true; @@ -1693,8 +1701,8 @@ B.setInstr(MI); - if (IdxVal.getValue() < VecTy.getNumElements()) - B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits()); + if (IdxVal->Value < VecTy.getNumElements()) + B.buildInsert(Dst, Vec, Ins, IdxVal->Value * EltTy.getSizeInBits()); else B.buildUndef(Dst); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -275,9 +275,8 @@ ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1 ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1) - ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) ; CHECK: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_CONSTANT i1 false @@ -676,3 +675,22 @@ %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 $vgpr0_vgpr1 = COPY %2 ... + +# Make sure we look through casts looking for a constant index. +--- +name: extract_vector_elt_look_through_trunc_0_v4i32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: extract_vector_elt_look_through_trunc_0_v4i32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0 + ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s64) = G_CONSTANT i64 0 + %2:_(s32) = G_TRUNC %1 + %3:_(s32) = G_EXTRACT_VECTOR_ELT %0, %2 + $vgpr0 = COPY %3 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -127,10 +127,8 @@ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8 - ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[SEXT_INREG]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s8) = G_CONSTANT i8 0 @@ -161,3 +159,24 @@ %5:_(<2 x s32>) = G_ANYEXT %4 $vgpr0_vgpr1 = COPY %5 ... + +--- +name: insert_vector_elt_v4s32_s32_look_through_trunc_0 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + + ; CHECK-LABEL: name: insert_vector_elt_v4s32_s32_look_through_trunc_0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s32) = COPY $vgpr4 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s32) = G_TRUNC %2 + %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %3 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 +...