Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -53,6 +53,8 @@ MachineIRBuilder &MIRBuilder) const; bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; + bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const; Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -793,7 +793,7 @@ case TargetOpcode::G_EXTRACT_VECTOR_ELT: return legalizeExtractVectorElt(MI, MRI, MIRBuilder); case TargetOpcode::G_INSERT_VECTOR_ELT: - return true; // TODO + return legalizeInsertVectorElt(MI, MRI, MIRBuilder); default: return false; } @@ -1154,6 +1154,36 @@ return true; } +bool AMDGPULegalizerInfo::legalizeInsertVectorElt( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + // TODO: Should move some of this into LegalizerHelper. + + // TODO: Promote dynamic indexing of s16 to s32 + // TODO: Dynamic s64 indexing is only legal for SGPR. + Optional IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI); + if (!IdxVal) // Dynamic case will be selected to register indexing. + return true; + + Register Dst = MI.getOperand(0).getReg(); + Register Vec = MI.getOperand(1).getReg(); + Register Ins = MI.getOperand(2).getReg(); + + LLT VecTy = MRI.getType(Vec); + LLT EltTy = VecTy.getElementType(); + assert(EltTy == MRI.getType(Ins)); + + B.setInstr(MI); + + if (IdxVal.getValue() < VecTy.getNumElements()) + B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits()); + else + B.buildUndef(Dst); + + MI.eraseFromParent(); + return true; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI) { Index: test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -10,9 +10,8 @@ ; CHECK-LABEL: name: insert_vector_elt_0_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = G_CONSTANT i32 0 @@ -20,6 +19,42 @@ $vgpr0_vgpr1 = COPY %3 ... +--- +name: insert_vector_elt_1_v2s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: insert_vector_elt_1_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: insert_vector_elt_2_v2s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: insert_vector_elt_2_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + --- name: insert_vector_elt_v2s32_varidx_i64