Index: llvm/lib/Target/Mips/MipsInstructionSelector.cpp =================================================================== --- llvm/lib/Target/Mips/MipsInstructionSelector.cpp +++ llvm/lib/Target/Mips/MipsInstructionSelector.cpp @@ -50,6 +50,9 @@ MachineRegisterInfo &MRI) const; bool selectInsertVectorEltImmIndex(MachineInstr &I, MachineRegisterInfo &MRI) const; + unsigned + selectInsertVectorEltVariableIndexOpcode(MachineInstr &I, + MachineRegisterInfo &MRI) const; const MipsTargetMachine &TM; const MipsSubtarget &STI; @@ -304,6 +307,38 @@ return false; } +/// When I.getOpcode() is returned, we failed to select MIPS instruction opcode. +unsigned MipsInstructionSelector::selectInsertVectorEltVariableIndexOpcode( + MachineInstr &I, MachineRegisterInfo &MRI) const { + Register Src = I.getOperand(1).getReg(); + Register Elt = I.getOperand(2).getReg(); + unsigned VectorElSize = MRI.getType(Src).getScalarSizeInBits(); + unsigned Opc = I.getOpcode(); + if (isRegInGprb(Elt, MRI)) + switch (VectorElSize) { + case 8: + return Mips::INSERT_B_VIDX_PSEUDO; + case 16: + return Mips::INSERT_H_VIDX_PSEUDO; + case 32: + return Mips::INSERT_W_VIDX_PSEUDO; + default: + return Opc; + } + + if (isRegInFprb(Elt, MRI)) + switch (VectorElSize) { + case 32: + return Mips::INSERT_FW_VIDX_PSEUDO; + case 64: + return Mips::INSERT_FD_VIDX_PSEUDO; + default: + return Opc; + } + + return Opc; +} + bool MipsInstructionSelector::select(MachineInstr &I) { MachineBasicBlock &MBB = *I.getParent(); @@ -500,7 +535,18 @@ if (IdxI->getOpcode() == G_CONSTANT) return selectInsertVectorEltImmIndex(I, MRI); - return false; + // Insert vector element index is not an immediate, + // use instruction that supports index in virtual register. + const unsigned Opcode = selectInsertVectorEltVariableIndexOpcode(I, MRI); + if (Opcode == I.getOpcode()) + return false; + + MI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(Opcode)) + .addDef(I.getOperand(0).getReg()) + .addUse(I.getOperand(1).getReg()) + .addUse(IdxReg) + .addUse(I.getOperand(2).getReg()); + break; } case G_UDIV: case G_UREM: Index: llvm/test/CodeGen/Mips/GlobalISel/instruction-select/insert_vector_elt_variable_index.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/instruction-select/insert_vector_elt_variable_index.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @insert_i8(i8 %val, <16 x i8>* %V, i32 %index) { entry: ret void } + define void @insert_i16(i16 %val, <8 x i16>* %V, i32 %index) { entry: ret void } + define void @insert_i32(i32 %val, <4 x i32>* %V, i32 %index) { entry: ret void } + define void @insert_float_fprb(float %val, <4 x float>* %V, i32 %index) { entry: ret void } + define void @insert_double(double %val, <2 x double>* %V, i32 %index) { entry: ret void } + +... +--- +name: insert_i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i8 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[LD_B:%[0-9]+]]:msa128b = LD_B [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128b = INSERT_B_VIDX_PSEUDO [[LD_B]], [[COPY2]], [[COPY]] + ; P5600: ST_B [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %3:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %2:gprb(s32) = COPY $a2 + %4:fprb(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:gprb(s32) = COPY %3(s32) + %5:fprb(<16 x s8>) = G_INSERT_VECTOR_ELT %4, %6(s32), %2(s32) + G_STORE %5(<16 x s8>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i16 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[LD_H:%[0-9]+]]:msa128h = LD_H [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128h = INSERT_H_VIDX_PSEUDO [[LD_H]], [[COPY2]], [[COPY]] + ; P5600: ST_H [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %3:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %2:gprb(s32) = COPY $a2 + %4:fprb(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:gprb(s32) = COPY %3(s32) + %5:fprb(<8 x s16>) = G_INSERT_VECTOR_ELT %4, %6(s32), %2(s32) + G_STORE %5(<8 x s16>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i32 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128w = INSERT_W_VIDX_PSEUDO [[LD_W]], [[COPY2]], [[COPY]] + ; P5600: ST_W [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:gprb(s32) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %2:gprb(s32) = COPY $a2 + %3:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %2(s32) + G_STORE %4(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_fprb +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a1, $a2, $f12 + + ; P5600-LABEL: name: insert_float_fprb + ; P5600: liveins: $a1, $a2, $f12 + ; P5600: [[COPY:%[0-9]+]]:fgr32 = COPY $f12 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128w = INSERT_FW_VIDX_PSEUDO [[LD_W]], [[COPY2]], [[COPY]] + ; P5600: ST_W [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:fprb(s32) = COPY $f12 + %1:gprb(p0) = COPY $a1 + %2:gprb(s32) = COPY $a2 + %3:fprb(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %2(s32) + G_STORE %4(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_double +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a2, $a3, $d12_64 + + ; P5600-LABEL: name: insert_double + ; P5600: liveins: $a2, $a3, $d12_64 + ; P5600: [[COPY:%[0-9]+]]:fgr64 = COPY $d12_64 + ; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a2 + ; P5600: [[COPY2:%[0-9]+]]:gpr32 = COPY $a3 + ; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY1]], 0 :: (load 16 from %ir.V) + ; P5600: [[INSERT_:%[0-9]+]]:msa128d = INSERT_FD_VIDX_PSEUDO [[LD_D]], [[COPY2]], [[COPY]] + ; P5600: ST_D [[INSERT_]], [[COPY1]], 0 :: (store 16 into %ir.V) + ; P5600: RetRA + %0:fprb(s64) = COPY $d12_64 + %1:gprb(p0) = COPY $a2 + %2:gprb(s32) = COPY $a3 + %3:fprb(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:fprb(<2 x s64>) = G_INSERT_VECTOR_ELT %3, %0(s64), %2(s32) + G_STORE %4(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... Index: llvm/test/CodeGen/Mips/GlobalISel/legalizer/insert_vector_elt_variable_index.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/legalizer/insert_vector_elt_variable_index.mir @@ -0,0 +1,204 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @insert_i8(i8 %val, <16 x i8>* %V, i32 %index) { entry: ret void } + define void @insert_i16(i16 %val, <8 x i16>* %V, i32 %index) { entry: ret void } + define void @insert_i32(i32 %val, <4 x i32>* %V, i32 %index) { entry: ret void } + define void @insert_i64_fprb(i64* %pval, <2 x i64>* %V, i64* %Ret, i32 %index) { entry: ret void } + define void @insert_float_fprb(float %val, <4 x float>* %V, i32 %index) { entry: ret void } + define void @insert_float_gprb(float* %pval, <4 x float>* %V, float* %Ret, i32 %index) { entry: ret void } + define void @insert_double(double %val, <2 x double>* %V, i32 %index) { entry: ret void } + +... +--- +name: insert_i8 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i8 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; P5600: [[IVEC:%[0-9]+]]:_(<16 x s8>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY3]](s32), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<16 x s8>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %3:_(s32) = COPY $a0 + %0:_(s8) = G_TRUNC %3(s32) + %1:_(p0) = COPY $a1 + %2:_(s32) = COPY $a2 + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %5:_(<16 x s8>) = G_INSERT_VECTOR_ELT %4, %0(s8), %2(s32) + G_STORE %5(<16 x s8>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i16 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i16 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; P5600: [[IVEC:%[0-9]+]]:_(<8 x s16>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY3]](s32), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<8 x s16>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %3:_(s32) = COPY $a0 + %0:_(s16) = G_TRUNC %3(s32) + %1:_(p0) = COPY $a1 + %2:_(s32) = COPY $a2 + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %5:_(<8 x s16>) = G_INSERT_VECTOR_ELT %4, %0(s16), %2(s32) + G_STORE %5(<8 x s16>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i32 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s32), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s32) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(s32) = COPY $a2 + %3:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %2(s32) + G_STORE %4(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i64_fprb +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2, $a3 + + ; P5600-LABEL: name: insert_i64_fprb + ; P5600: liveins: $a0, $a1, $a2, $a3 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 + ; P5600: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.pval) + ; P5600: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[LOAD1]], [[LOAD]](s64), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %3:_(s32) = COPY $a3 + %4:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.pval) + %5:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:_(<2 x s64>) = G_INSERT_VECTOR_ELT %5, %4(s64), %3(s32) + G_STORE %6(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_fprb +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a1, $a2, $f12 + + ; P5600-LABEL: name: insert_float_fprb + ; P5600: liveins: $a1, $a2, $f12 + ; P5600: [[COPY:%[0-9]+]]:_(s32) = COPY $f12 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s32), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s32) = COPY $f12 + %1:_(p0) = COPY $a1 + %2:_(s32) = COPY $a2 + %3:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %2(s32) + G_STORE %4(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_gprb +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2, $a3 + + ; P5600-LABEL: name: insert_float_gprb + ; P5600: liveins: $a0, $a1, $a2, $a3 + ; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 + ; P5600: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.pval) + ; P5600: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD1]], [[LOAD]](s32), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %3:_(s32) = COPY $a3 + %4:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.pval) + %5:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:_(<4 x s32>) = G_INSERT_VECTOR_ELT %5, %4(s32), %3(s32) + G_STORE %6(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_double +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a2, $a3, $d12_64 + + + ; P5600-LABEL: name: insert_double + ; P5600: liveins: $a2, $a3, $d12_64 + ; P5600: [[COPY:%[0-9]+]]:_(s64) = COPY $d12_64 + ; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a2 + ; P5600: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3 + ; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s64), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s64) = COPY $d12_64 + %1:_(p0) = COPY $a2 + %2:_(s32) = COPY $a3 + %3:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<2 x s64>) = G_INSERT_VECTOR_ELT %3, %0(s64), %2(s32) + G_STORE %4(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/insert_vector_elt_variable_index.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/insert_vector_elt_variable_index.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600 + +define void @insert_i8(i8 %val, <16 x i8>* %V, i32 %index) { +; P5600-LABEL: insert_i8: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.b $w0, 0($5) +; P5600-NEXT: sld.b $w0, $w0[$6] +; P5600-NEXT: insert.b $w0[0], $4 +; P5600-NEXT: neg $1, $6 +; P5600-NEXT: sld.b $w0, $w0[$1] +; P5600-NEXT: st.b $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <16 x i8>, <16 x i8>* %V, align 16 + %vecins = insertelement <16 x i8> %0, i8 %val, i32 %index + store <16 x i8> %vecins, <16 x i8>* %V, align 16 + ret void +} + +define void @insert_i16(i16 %val, <8 x i16>* %V, i32 %index) { +; P5600-LABEL: insert_i16: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.h $w0, 0($5) +; P5600-NEXT: sll $1, $6, 1 +; P5600-NEXT: sld.b $w0, $w0[$1] +; P5600-NEXT: insert.h $w0[0], $4 +; P5600-NEXT: neg $1, $1 +; P5600-NEXT: sld.b $w0, $w0[$1] +; P5600-NEXT: st.h $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <8 x i16>, <8 x i16>* %V, align 16 + %vecins = insertelement <8 x i16> %0, i16 %val, i32 %index + store <8 x i16> %vecins, <8 x i16>* %V, align 16 + ret void +} + +define void @insert_i32(i32 %val, <4 x i32>* %V, i32 %index) { +; P5600-LABEL: insert_i32: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: sll $1, $6, 2 +; P5600-NEXT: sld.b $w0, $w0[$1] +; P5600-NEXT: insert.w $w0[0], $4 +; P5600-NEXT: neg $1, $1 +; P5600-NEXT: sld.b $w0, $w0[$1] +; P5600-NEXT: st.w $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <4 x i32>, <4 x i32>* %V, align 16 + %vecins = insertelement <4 x i32> %0, i32 %val, i32 %index + store <4 x i32> %vecins, <4 x i32>* %V, align 16 + ret void +} + +define void @insert_i64_fprb(i64* %pval, <2 x i64>* %V, i64* %Ret, i32 %index) { +; P5600-LABEL: insert_i64_fprb: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ldc1 $f0, 0($4) +; P5600-NEXT: ld.d $w1, 0($5) +; P5600-NEXT: mov.d $f2, $f0 +; P5600-NEXT: sll $1, $7, 3 +; P5600-NEXT: sld.b $w1, $w1[$1] +; P5600-NEXT: insve.d $w1[0], $w2[0] +; P5600-NEXT: neg $1, $1 +; P5600-NEXT: sld.b $w1, $w1[$1] +; P5600-NEXT: st.d $w1, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load i64, i64* %pval, align 8 + %1 = load <2 x i64>, <2 x i64>* %V, align 16 + %vecins = insertelement <2 x i64> %1, i64 %0, i32 %index + store <2 x i64> %vecins, <2 x i64>* %V, align 16 + ret void +} + +define void @insert_float_fprb(float %val, <4 x float>* %V, i32 %index) { +; P5600-LABEL: insert_float_fprb: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: mov.s $f1, $f12 +; P5600-NEXT: sll $1, $6, 2 +; P5600-NEXT: sld.b $w0, $w0[$1] +; P5600-NEXT: insve.w $w0[0], $w1[0] +; P5600-NEXT: neg $1, $1 +; P5600-NEXT: sld.b $w0, $w0[$1] +; P5600-NEXT: st.w $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <4 x float>, <4 x float>* %V, align 16 + %vecins = insertelement <4 x float> %0, float %val, i32 %index + store <4 x float> %vecins, <4 x float>* %V, align 16 + ret void +} + +define void @insert_float_gprb(float* %pval, <4 x float>* %V, float* %Ret, i32 %index) { +; P5600-LABEL: insert_float_gprb: +; P5600: # %bb.0: # %entry +; P5600-NEXT: lw $1, 0($4) +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: sll $2, $7, 2 +; P5600-NEXT: sld.b $w0, $w0[$2] +; P5600-NEXT: insert.w $w0[0], $1 +; P5600-NEXT: neg $1, $2 +; P5600-NEXT: sld.b $w0, $w0[$1] +; P5600-NEXT: st.w $w0, 0($5) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load float, float* %pval, align 4 + %1 = load <4 x float>, <4 x float>* %V, align 16 + %vecins = insertelement <4 x float> %1, float %0, i32 %index + store <4 x float> %vecins, <4 x float>* %V, align 16 + ret void +} + +define void @insert_double(double %val, <2 x double>* %V, i32 %index) { +; P5600-LABEL: insert_double: +; P5600: # %bb.0: # %entry +; P5600-NEXT: ld.d $w0, 0($6) +; P5600-NEXT: mov.d $f1, $f12 +; P5600-NEXT: sll $1, $7, 3 +; P5600-NEXT: sld.b $w0, $w0[$1] +; P5600-NEXT: insve.d $w0[0], $w1[0] +; P5600-NEXT: neg $1, $1 +; P5600-NEXT: sld.b $w0, $w0[$1] +; P5600-NEXT: st.d $w0, 0($6) +; P5600-NEXT: jr $ra +; P5600-NEXT: nop +entry: + %0 = load <2 x double>, <2 x double>* %V, align 16 + %vecins = insertelement <2 x double> %0, double %val, i32 %index + store <2 x double> %vecins, <2 x double>* %V, align 16 + ret void +} Index: llvm/test/CodeGen/Mips/GlobalISel/regbankselect/insert_vector_elt_variable_index.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/GlobalISel/regbankselect/insert_vector_elt_variable_index.mir @@ -0,0 +1,210 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600 +--- | + + define void @insert_i8(i8 %val, <16 x i8>* %V, i32 %index) { entry: ret void } + define void @insert_i16(i16 %val, <8 x i16>* %V, i32 %index) { entry: ret void } + define void @insert_i32(i32 %val, <4 x i32>* %V, i32 %index) { entry: ret void } + define void @insert_i64_fprb(i64* %pval, <2 x i64>* %V, i64* %Ret, i32 %index) { entry: ret void } + define void @insert_float_fprb(float %val, <4 x float>* %V, i32 %index) { entry: ret void } + define void @insert_float_gprb(float* %pval, <4 x float>* %V, float* %Ret, i32 %index) { entry: ret void } + define void @insert_double(double %val, <2 x double>* %V, i32 %index) { entry: ret void } + +... +--- +name: insert_i8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i8 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<16 x s8>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[COPY3:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<16 x s8>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY3]](s32), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<16 x s8>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %3:_(s32) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(s32) = COPY $a2 + %4:_(<16 x s8>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:_(s32) = COPY %3(s32) + %5:_(<16 x s8>) = G_INSERT_VECTOR_ELT %4, %6(s32), %2(s32) + G_STORE %5(<16 x s8>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i16 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i16 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<8 x s16>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[COPY3:%[0-9]+]]:gprb(s32) = COPY [[COPY]](s32) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<8 x s16>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY3]](s32), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<8 x s16>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %3:_(s32) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(s32) = COPY $a2 + %4:_(<8 x s16>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:_(s32) = COPY %3(s32) + %5:_(<8 x s16>) = G_INSERT_VECTOR_ELT %4, %6(s32), %2(s32) + G_STORE %5(<8 x s16>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2 + + ; P5600-LABEL: name: insert_i32 + ; P5600: liveins: $a0, $a1, $a2 + ; P5600: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s32), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s32) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(s32) = COPY $a2 + %3:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %2(s32) + G_STORE %4(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_i64_fprb +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2, $a3 + + ; P5600-LABEL: name: insert_i64_fprb + ; P5600: liveins: $a0, $a1, $a2, $a3 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a3 + ; P5600: [[LOAD:%[0-9]+]]:fprb(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.pval) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<2 x s64>) = G_INSERT_VECTOR_ELT [[LOAD1]], [[LOAD]](s64), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %3:_(s32) = COPY $a3 + %4:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.pval) + %5:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:_(<2 x s64>) = G_INSERT_VECTOR_ELT %5, %4(s64), %3(s32) + G_STORE %6(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_fprb +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a1, $a2, $f12 + + ; P5600-LABEL: name: insert_float_fprb + ; P5600: liveins: $a1, $a2, $f12 + ; P5600: [[COPY:%[0-9]+]]:fprb(s32) = COPY $f12 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a2 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s32), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s32) = COPY $f12 + %1:_(p0) = COPY $a1 + %2:_(s32) = COPY $a2 + %3:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %3, %0(s32), %2(s32) + G_STORE %4(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_float_gprb +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2, $a3 + + ; P5600-LABEL: name: insert_float_gprb + ; P5600: liveins: $a0, $a1, $a2, $a3 + ; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; P5600: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a3 + ; P5600: [[LOAD:%[0-9]+]]:gprb(s32) = G_LOAD [[COPY]](p0) :: (load 4 from %ir.pval) + ; P5600: [[LOAD1:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<4 x s32>) = G_INSERT_VECTOR_ELT [[LOAD1]], [[LOAD]](s32), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %3:_(s32) = COPY $a3 + %4:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.pval) + %5:_(<4 x s32>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %6:_(<4 x s32>) = G_INSERT_VECTOR_ELT %5, %4(s32), %3(s32) + G_STORE %6(<4 x s32>), %1(p0) :: (store 16 into %ir.V) + RetRA + +... +--- +name: insert_double +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a2, $a3, $d12_64 + + ; P5600-LABEL: name: insert_double + ; P5600: liveins: $a2, $a3, $d12_64 + ; P5600: [[COPY:%[0-9]+]]:fprb(s64) = COPY $d12_64 + ; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a2 + ; P5600: [[COPY2:%[0-9]+]]:gprb(s32) = COPY $a3 + ; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.V) + ; P5600: [[IVEC:%[0-9]+]]:fprb(<2 x s64>) = G_INSERT_VECTOR_ELT [[LOAD]], [[COPY]](s64), [[COPY2]](s32) + ; P5600: G_STORE [[IVEC]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.V) + ; P5600: RetRA + %0:_(s64) = COPY $d12_64 + %1:_(p0) = COPY $a2 + %2:_(s32) = COPY $a3 + %3:_(<2 x s64>) = G_LOAD %1(p0) :: (load 16 from %ir.V) + %4:_(<2 x s64>) = G_INSERT_VECTOR_ELT %3, %0(s64), %2(s32) + G_STORE %4(<2 x s64>), %1(p0) :: (store 16 into %ir.V) + RetRA + +...