Index: llvm/lib/Target/Mips/MipsInstructionSelector.cpp =================================================================== --- llvm/lib/Target/Mips/MipsInstructionSelector.cpp +++ llvm/lib/Target/Mips/MipsInstructionSelector.cpp @@ -48,6 +48,8 @@ getRegClassForTypeOnBank(Register Reg, MachineRegisterInfo &MRI) const; unsigned selectLoadStoreOpCode(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectInsertVectorEltImmIndex(MachineInstr &I, + MachineRegisterInfo &MRI) const; const MipsTargetMachine &TM; const MipsSubtarget &STI; @@ -244,6 +246,64 @@ return Opc; } +bool MipsInstructionSelector::selectInsertVectorEltImmIndex( + MachineInstr &I, MachineRegisterInfo &MRI) const { + MachineBasicBlock &MBB = *I.getParent(); + Register Dst = I.getOperand(0).getReg(); + Register Src = I.getOperand(1).getReg(); + Register Elt = I.getOperand(2).getReg(); + unsigned VectorElSize = MRI.getType(Src).getScalarSizeInBits(); + unsigned Opcode; + + Register IdxReg = I.getOperand(3).getReg(); + MachineInstr *IdxI = MRI.getVRegDef(IdxReg); + APInt Idx = IdxI->getOperand(1).getCImm()->getValue(); + if (isRegInGprb(Elt, MRI)) { + switch (VectorElSize) { + case 8: + Opcode = Mips::INSERT_B; + break; + case 16: + Opcode = Mips::INSERT_H; + break; + case 32: + Opcode = Mips::INSERT_W; + break; + default: + return false; + } + MachineInstr *MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode)) + .addDef(Dst) + .addUse(Src) + .addUse(Elt) + .addImm(Idx.getLimitedValue()); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); + } + + if (isRegInFprb(Elt, MRI)) { + switch (VectorElSize) { + case 32: + Opcode = Mips::INSERT_FW_PSEUDO; + break; + case 64: + Opcode = Mips::INSERT_FD_PSEUDO; + break; + default: + return false; + } + MachineInstr *MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode)) + .addDef(Dst) + .addUse(Src) + .addImm(Idx.getLimitedValue()) + .addUse(Elt); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); + } + + return false; +} + bool MipsInstructionSelector::select(MachineInstr &I) { MachineBasicBlock &MBB = *I.getParent(); @@ -432,6 +492,16 @@ .addMemOperand(*I.memoperands_begin()); break; } + case G_INSERT_VECTOR_ELT: { + assert(STI.hasMSA() && "Vector instructions require target with MSA."); + Register IdxReg = I.getOperand(3).getReg(); + MachineInstr *IdxI = MRI.getVRegDef(IdxReg); + + if (IdxI->getOpcode() == G_CONSTANT) + return selectInsertVectorEltImmIndex(I, MRI); + + return false; + } case G_UDIV: case G_UREM: case G_SDIV: Index: llvm/lib/Target/Mips/MipsLegalizerInfo.cpp =================================================================== --- llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -99,6 +99,15 @@ getActionDefinitionsBuilder(G_IMPLICIT_DEF) .legalFor({s32, s64}); + getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) + .legalIf([=, &ST](const LegalityQuery &Query) { + if (ST.hasMSA() && CheckTyN(0, Query, {v16s8, v8s16, v4s32, v2s64}) && + CheckTyN(1, Query, {s32, s64}) && CheckTyN(2, Query, {s32})) + return true; + return false; + }) + .minScalar(1, s32); + getActionDefinitionsBuilder(G_UNMERGE_VALUES) .legalFor({{s32, s64}}); Index: llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -162,6 +162,7 @@ case TargetOpcode::G_PHI: case TargetOpcode::G_SELECT: case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_INSERT_VECTOR_ELT: return true; default: return false; @@ -229,6 +230,8 @@ if (MI->getOpcode() == TargetOpcode::G_STORE) addUseDef(MI->getOperand(0).getReg(), MRI); + if (MI->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT) + addUseDef(MI->getOperand(2).getReg(), MRI); if (MI->getOpcode() == TargetOpcode::G_PHI) { addDefUses(MI->getOperand(0).getReg(), MRI); @@ -527,6 +530,28 @@ OperandsMapping = getGprbOrCustomMapping(Op0Size, MappingID); break; + case G_INSERT_VECTOR_ELT: { + const LLT Op2Ty = MRI.getType(MI.getOperand(2).getReg()); + unsigned Op2Size = Op2Ty.getSizeInBits(); + + if (!Op2Ty.isPointer()) + InstTy = TI.determineInstType(&MI); + + const RegisterBankInfo::ValueMapping *MSABank = getMSAMapping(MF); + if (InstTy == InstType::FloatingPoint || + (Op2Size == 64 && InstTy == InstType::Ambiguous)) + OperandsMapping = + getOperandsMapping({MSABank, MSABank, getFprbMapping(Op2Size), + &Mips::ValueMappings[Mips::GPRIdx]}); + else + // Ambiguous 32 bit vector insert will be mapped to gprb to match the + // way other ambiguous 32 bit operands are mapped. + OperandsMapping = getOperandsMapping( + {MSABank, MSABank, getGprbOrCustomMapping(Op2Size, MappingID), + &Mips::ValueMappings[Mips::GPRIdx]}); + + break; + } case G_UNMERGE_VALUES: OperandsMapping = getOperandsMapping({&Mips::ValueMappings[Mips::GPRIdx], &Mips::ValueMappings[Mips::GPRIdx], Index: llvm/test/CodeGen/Mips/GlobalISel/instruction-select/insert_vector_elt_imm_index.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/instruction-select/insert_vector_elt_imm_index.mir @@ -0,0 +1,148 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @insert_i8(i8 %val, <16 x i8>* %V) { entry: ret void } + define void @insert_i16(i16 %val, <8 x i16>* %V) { entry: ret void } + define void @insert_i32(i32 %val, <4 x i32>* %V) { entry: ret void } + define void @insert_float_fprb(float %val, <4 x float>* %V) { entry: ret void } + define void @insert_double(double %val, <2 x double>* %V) { entry: ret void } + +... +--- +name: insert_i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i8 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_B:%[0-9]+]]:msa128b = INSERT_B [[LD_B]], [[COPY]], 7 + ; P5600: ST_B [[INSERT_B]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %2:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %5:gprb(s32) = G_CONSTANT i32 7 + %3:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:gprb(s32) = COPY %2(s32) + %4:fprb(<16 x s8>) = G_INSERT_VECTOR_ELT %3, %6(s32), %5(s32) + G_STORE %4(<16 x s8>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i16 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_H:%[0-9]+]]:msa128h = INSERT_H [[LD_H]], [[COPY]], 5 + ; P5600: ST_H [[INSERT_H]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %2:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %5:gprb(s32) = G_CONSTANT i32 5 + %3:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:gprb(s32) = COPY %2(s32) + %4:fprb(<8 x s16>) = G_INSERT_VECTOR_ELT %3, %6(s32), %5(s32) + G_STORE %4(<8 x s16>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i32 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_W:%[0-9]+]]:msa128w = INSERT_W [[LD_W]], [[COPY]], 2 + ; P5600: ST_W [[INSERT_W]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %4:gprb(s32) = G_CONSTANT i32 2 + %2:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %4(s32) + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_fprb +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a1, $f12 + + ; P5600-LABEL: name: insert_float_fprb + ; P5600: liveins: $a1, $f12 + ; P5600: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128w = INSERT_FW_PSEUDO [[LD_W]], 3, [[COPY]] + ; P5600: ST_W [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:fprb(s32) = COPY $f12 + %1:gprb(p0) = COPY $a1 + %4:gprb(s32) = G_CONSTANT i32 3 + %2:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %4(s32) + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_double +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a2, $d12_64 + + ; P5600-LABEL: name: insert_double + ; P5600: liveins: $a2, $d12_64 + ; P5600: [[COPY:%[0-9]+]]:fgr64 = COPY $d12_64 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128d = INSERT_FD_PSEUDO [[LD_D]], 1, [[COPY]] + ; P5600: ST_D [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:fprb(s64) = COPY $d12_64 + %1:gprb(p0) = COPY $a2 + %4:gprb(s32) = G_CONSTANT i32 1 + %2:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:fprb(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %4(s32) + G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... Index: llvm/test/CodeGen/Mips/GlobalISel/legalizer/insert_vector_elt_imm_index.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/legalizer/insert_vector_elt_imm_index.mir @@ -0,0 +1,203 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @insert_i8(i8 %val, <16 x i8>* %V) { entry: ret void } + define void @insert_i16(i16 %val, <8 x i16>* %V) { entry: ret void } + define void @insert_i32(i32 %val, <4 x i32>* %V) { entry: ret void } + define void @insert_i64_fprb(i64* %pval, <2 x i64>* %V) { entry: ret void } + define void @insert_float_fprb(float %val, <4 x float>* %V) { entry: ret void } + define void @insert_float_gprb(float* %pval, <4 x float>* %V) { entry: ret void } + define void @insert_double(double %val, <2 x double>* %V) { entry: ret void } + +... +--- +name: insert_i8 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i8 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; P5600: [[IVEC:%[0-9]+]]:_(<16 x s8>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY2]](s32), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<16 x s8>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %2:_(s32) = COPY $a0 + %0:_(s8) = G_TRUNC %2(s32) + %1:_(p0) = COPY $a1 + %5:_(s32) = G_CONSTANT i32 7 + %3:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<16 x s8>) = G_INSERT_VECTOR_ELT %3, %0(s8), %5(s32) + G_STORE %4(<16 x s8>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i16 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i16 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; P5600: [[IVEC:%[0-9]+]]:_(<8 x s16>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY2]](s32), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<8 x s16>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %2:_(s32) = COPY $a0 + %0:_(s16) = G_TRUNC %2(s32) + %1:_(p0) = COPY $a1 + %5:_(s32) = G_CONSTANT i32 5 + %3:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<8 x s16>) = G_INSERT_VECTOR_ELT %3, %0(s16), %5(s32) + G_STORE %4(<8 x s16>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i32 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s32), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s32) = COPY $a0 + %1:_(p0) = COPY $a1 + %4:_(s32) = G_CONSTANT i32 2 + %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %4(s32) + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i64_fprb +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i64_fprb + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; P5600: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.pval) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[LOAD1]], [[LOAD]](s64), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %5:_(s32) = G_CONSTANT i32 0 + %2:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.pval) + %3:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<2 x s64>) = G_INSERT_VECTOR_ELT %3, %2(s64), %5(s32) + G_STORE %4(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_fprb +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a1, $f12 + + ; P5600-LABEL: name: insert_float_fprb + ; P5600: liveins: $a1, $f12 + ; P5600: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s32), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s32) = COPY $f12 + %1:_(p0) = COPY $a1 + %4:_(s32) = G_CONSTANT i32 3 + %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %4(s32) + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_gprb +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_float_gprb + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; P5600: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.pval) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD1]], [[LOAD]](s32), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %5:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.pval) + %3:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %3, %2(s32), %5(s32) + G_STORE %4(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_double +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a2, $d12_64 + + ; P5600-LABEL: name: insert_double + ; P5600: liveins: $a2, $d12_64 + ; P5600: [[COPY:%[0-9]+]]:_(s64) = COPY $d12_64 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a2 + ; P5600: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s64), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s64) = COPY $d12_64 + %1:_(p0) = COPY $a2 + %4:_(s32) = G_CONSTANT i32 1 + %2:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %4(s32) + G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/insert_vector_elt_imm_index.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/insert_vector_elt_imm_index.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600 + +define void @insert_i8(i8 %val, <16 x i8>* %V) { +; P5600-LABEL: insert_i8: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.b $w0, 0($5) +; P5600-NEXT: insert.b $w0[7], $4 +; P5600-NEXT: st.b $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <16 x i8>, <16 x i8>* %V, align 16 + %vecins = insertelement <16 x i8> %0, i8 %val, i32 7 + store <16 x i8> %vecins, <16 x i8>* %V, align 16 + ret void +} + +define void @insert_i16(i16 %val, <8 x i16>* %V) { +; P5600-LABEL: insert_i16: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.h $w0, 0($5) +; P5600-NEXT: insert.h $w0[5], $4 +; P5600-NEXT: st.h $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <8 x i16>, <8 x i16>* %V, align 16 + %vecins = insertelement <8 x i16> %0, i16 %val, i32 5 + store <8 x i16> %vecins, <8 x i16>* %V, align 16 + ret void +} + +define void @insert_i32(i32 %val, <4 x i32>* %V) { +; P5600-LABEL: insert_i32: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: insert.w $w0[2], $4 +; P5600-NEXT: st.w $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <4 x i32>, <4 x i32>* %V, align 16 + %vecins = insertelement <4 x i32> %0, i32 %val, i32 2 + store <4 x i32> %vecins, <4 x i32>* %V, align 16 + ret void +} + +define void @insert_i64_fprb(i64* %pval, <2 x i64>* %V) { +; P5600-LABEL: insert_i64_fprb: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ldc1 $f0, 0($4) +; P5600-NEXT: ld.d $w1, 0($5) +; P5600-NEXT: mov.d $f2, $f0 +; P5600-NEXT: insve.d $w1[0], $w2[0] +; P5600-NEXT: st.d $w1, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load i64, i64* %pval, align 8 + %1 = load <2 x i64>, <2 x i64>* %V, align 16 + %vecins = insertelement <2 x i64> %1, i64 %0, i32 0 + store <2 x i64> %vecins, <2 x i64>* %V, align 16 + ret void +} + +define void @insert_float_fprb(float %val, <4 x float>* %V) { +; P5600-LABEL: insert_float_fprb: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: mov.s $f1, $f12 +; P5600-NEXT: insve.w $w0[3], $w1[0] +; P5600-NEXT: st.w $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <4 x float>, <4 x float>* %V, align 16 + %vecins = insertelement <4 x float> %0, float %val, i32 3 + store <4 x float> %vecins, <4 x float>* %V, align 16 + ret void +} + +define void @insert_float_gprb(float* %pval, <4 x float>* %V) { +; P5600-LABEL: insert_float_gprb: +; P5600: # %bb.0: # %entry +; P5600-NEXT: lw $1, 0($4) +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: insert.w $w0[1], $1 +; P5600-NEXT: st.w $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load float, float* %pval, align 4 + %1 = load <4 x float>, <4 x float>* %V, align 16 + %vecins = insertelement <4 x float> %1, float %0, i32 1 + store <4 x float> %vecins, <4 x float>* %V, align 16 + ret void +} + +define void @insert_double(double %val, <2 x double>* %V) { +; P5600-LABEL: insert_double: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.d $w0, 0($6) +; P5600-NEXT: mov.d $f1, $f12 +; P5600-NEXT: insve.d $w0[1], $w1[0] +; P5600-NEXT: st.d $w0, 0($6) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <2 x double>, <2 x double>* %V, align 16 + %vecins = insertelement <2 x double> %0, double %val, i32 1 + store <2 x double> %vecins, <2 x double>* %V, align 16 + ret void +} Index: llvm/test/CodeGen/Mips/GlobalISel/regbankselect/insert_vector_elt_imm_index.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/regbankselect/insert_vector_elt_imm_index.mir @@ -0,0 +1,210 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @insert_i8(i8 %val, <16 x i8>* %V) { entry: ret void } + define void @insert_i16(i16 %val, <8 x i16>* %V) { entry: ret void } + define void @insert_i32(i32 %val, <4 x i32>* %V) { entry: ret void } + define void @insert_i64_fprb(i64* %pval, <2 x i64>* %V) { entry: ret void } + define void @insert_float_fprb(float %val, <4 x float>* %V) { entry: ret void } + define void @insert_float_gprb(float* %pval, <4 x float>* %V) { entry: ret void } + define void @insert_double(double %val, <2 x double>* %V) { entry: ret void } + +... +--- +name: insert_i8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i8 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 7 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<16 x s8>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY2]](s32), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<16 x s8>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %2:_(s32) = COPY $a0 + %1:_(p0) = COPY $a1 + %5:_(s32) = G_CONSTANT i32 7 + %3:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:_(s32) = COPY %2(s32) + %4:_(<16 x s8>) = G_INSERT_VECTOR_ELT %3, %6(s32), %5(s32) + G_STORE %4(<16 x s8>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i16 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i16 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 5 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<8 x s16>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY2]](s32), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<8 x s16>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %2:_(s32) = COPY $a0 + %1:_(p0) = COPY $a1 + %5:_(s32) = G_CONSTANT i32 5 + %3:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:_(s32) = COPY %2(s32) + %4:_(<8 x s16>) = G_INSERT_VECTOR_ELT %3, %6(s32), %5(s32) + G_STORE %4(<8 x s16>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i32 + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 2 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s32), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s32) = COPY $a0 + %1:_(p0) = COPY $a1 + %4:_(s32) = G_CONSTANT i32 2 + %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %4(s32) + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i64_fprb +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_i64_fprb + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 0 + ; P5600: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.pval) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<2 x s64>) = G_INSERT_VECTOR_ELT [[LOAD1]], [[LOAD]](s64), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %5:_(s32) = G_CONSTANT i32 0 + %2:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.pval) + %3:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<2 x s64>) = G_INSERT_VECTOR_ELT %3, %2(s64), %5(s32) + G_STORE %4(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_fprb +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a1, $f12 + + ; P5600-LABEL: name: insert_float_fprb + ; P5600: liveins: $a1, $f12 + ; P5600: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 3 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s32), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s32) = COPY $f12 + %1:_(p0) = COPY $a1 + %4:_(s32) = G_CONSTANT i32 3 + %2:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %4(s32) + G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_gprb +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; P5600-LABEL: name: insert_float_gprb + ; P5600: liveins: $a0, $a1 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 + ; P5600: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.pval) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD1]], [[LOAD]](s32), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %5:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.pval) + %3:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %3, %2(s32), %5(s32) + G_STORE %4(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_double +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a2, $d12_64 + + ; P5600-LABEL: name: insert_double + ; P5600: liveins: $a2, $d12_64 + ; P5600: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d12_64 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a2 + ; P5600: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<2 x s64>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s64), [[C]](s32) + ; P5600: G_STORE [[IVEC]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s64) = COPY $d12_64 + %1:_(p0) = COPY $a2 + %4:_(s32) = G_CONSTANT i32 1 + %2:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %3:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %4(s32) + G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +...