Index: llvm/lib/Target/Mips/MipsInstrInfo.h =================================================================== --- llvm/lib/Target/Mips/MipsInstrInfo.h +++ llvm/lib/Target/Mips/MipsInstrInfo.h @@ -161,6 +161,14 @@ ArrayRef> getSerializableDirectMachineOperandTargetFlags() const override; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const override; + + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SubIdx, + const MachineInstr &Orig, + const TargetRegisterInfo &TRI) const override; + protected: bool isZeroImm(const MachineOperand &op) const; Index: llvm/lib/Target/Mips/MipsInstrInfo.cpp =================================================================== --- llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -841,3 +841,142 @@ }; return makeArrayRef(Flags); } + +bool MipsInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const { + switch (MI.getOpcode()) { + default: + // This function should only be called for opcodes with the ReMaterializable + // flag set. + llvm_unreachable("Unknown rematerializable operation!"); + break; + + // class MSA_I10_LDI_DESC_BASE + case Mips::LDI_B: + case Mips::LDI_H: + case Mips::LDI_W: + case Mips::LDI_D: + return true; + + // class LoadUpper + case Mips::LUi: + case Mips::LUi64: + case Mips::LUi_MM: + return true; + + // class LoadImmMM16 + case Mips::LI16_MM: + case Mips::LI16_MMR6: + return true; + + // class LUI_MMR6_DESC + case Mips::LUI_MMR6: + return true; + + // class ArithLogicI + case Mips::ADDIU_MMR6: + case Mips::ADDi: + case Mips::ADDi_MM: + case Mips::ADDiu: + case Mips::ADDiu_MM: + case Mips::ANDI_MMR6: + case Mips::ANDi: + case Mips::ANDi64: + case Mips::ANDi_MM: + case Mips::DADDi: + case Mips::DADDiu: + case Mips::ORI_MMR6: + case Mips::ORi: + case Mips::ORi64: + case Mips::ORi_MM: + case Mips::XORI_MMR6: + case Mips::XORi: + case Mips::XORi64: + case Mips::XORi_MM: + return MI.getOperand(1).getReg() == Mips::ZERO; + + // class ArithLogicR + case Mips::ADD: + case Mips::ADDU_MMR6: + case Mips::ADD_MM: + case Mips::ADD_MMR6: + case Mips::ADDu: + case Mips::ADDu_MM: + case Mips::AND: + case Mips::AND64: + case Mips::AND_MM: + case Mips::AND_MMR6: + case Mips::BADDu: + case Mips::DADD: + case Mips::DADDu: + case Mips::DSUB: + case Mips::DSUBu: + case Mips::OR: + case Mips::OR64: + case Mips::OR_MM: + case Mips::OR_MMR6: + case Mips::SUB: + case Mips::SUBU_MMR6: + case Mips::SUB_MM: + case Mips::SUB_MMR6: + case Mips::SUBu: + case Mips::SUBu_MM: + case Mips::XOR: + case Mips::XOR64: + case Mips::XOR_MM: + case Mips::XOR_MMR6: + return (MI.getOperand(1).getReg() == Mips::ZERO) && + (MI.getOperand(2).getReg() == Mips::ZERO); + case Mips::DMUL: + case Mips::MUHU_MMR6: + case Mips::MUH_MMR6: + case Mips::MUL: + case Mips::MULU_MMR6: + case Mips::MUL_MM: + case Mips::MUL_MMR6: + case Mips::V3MULU: + case Mips::VMM0: + case Mips::VMULU: + return false; + + // class ArithLogic16Defs + case Mips::AddiuRxRxImm16: + case Mips::AddiuRxRxImmX16: + case Mips::AndRxRxRy16: + case Mips::OrRxRxRy16: + return MI.getOperand(1).getReg() == Mips::ZERO; + case Mips::AdduRxRyRz16: + case Mips::SubuRxRyRz16: + case Mips::XorRxRxRy16: + return false; + + // class MoveMM16 + case Mips::MOVE16_MM: + case Mips::MOVE16_MMR6: + return MI.getOperand(1).getReg() == Mips::ZERO; + + // class MovePMM16 + case Mips::MOVEP_MM: + case Mips::MOVEP_MMR6: + return MI.getOperand(2).getReg() == Mips::ZERO && + MI.getOperand(3).getReg() == Mips::ZERO; + + // class DIVMOD_MMR6_DESC_BASE + case Mips::DIVU_MMR6: + case Mips::DIV_MMR6: + case Mips::MODU_MMR6: + case Mips::MOD_MMR6: + return false; + } +} + +void MipsInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SubIdx, + const MachineInstr &Orig, + const TargetRegisterInfo &TRI) const { + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); + MBB.insert(I, MI); + MachineInstr &NewMI = *std::prev(I); + NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI); +} Index: llvm/lib/Target/Mips/MipsMSAInstrInfo.td =================================================================== --- llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1287,6 +1287,7 @@ // LDI is matched using custom matching code in MipsSEISelDAGToDAG.cpp list Pattern = []; bit hasSideEffects = 0; + bit isReMaterializable = 1; InstrItinClass Itinerary = itin; } Index: llvm/test/CodeGen/Mips/msa/remat-ldi.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/msa/remat-ldi.ll @@ -0,0 +1,110 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -march=mipsel -mcpu=mips32r6 -mattr=+fp64,+msa %s -o - | FileCheck %s + +; Test that checks if spill for ldi can be avoided and instruction will be +; rematerialized. + +declare dso_local void @foo() + +define dso_local void @test_ldi_b() { +; CHECK-LABEL: test_ldi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: ldi.b $w0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jal foo +; CHECK-NEXT: nop +; CHECK-NEXT: ldi.b $w0, 1 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +entry: + tail call void asm sideeffect "", "f,~{memory},~{$1}"(<16 x i8> ) + tail call void @foo() + tail call void asm sideeffect "", "f,~{memory},~{$1}"(<16 x i8> ) + ret void +} + +define dso_local void @test_ldi_h() { +; CHECK-LABEL: test_ldi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: ldi.h $w0, 2 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jal foo +; CHECK-NEXT: nop +; CHECK-NEXT: ldi.h $w0, 2 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +entry: + tail call void asm sideeffect "", "f,~{memory},~{$1}"(<8 x i16> ) + tail call void @foo() + tail call void asm sideeffect "", "f,~{memory},~{$1}"(<8 x i16> ) + ret void +} + +define dso_local void @test_ldi_w() { +; CHECK-LABEL: test_ldi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: ldi.w $w0, 3 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jal foo +; CHECK-NEXT: nop +; CHECK-NEXT: ldi.w $w0, 3 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +entry: + tail call void asm sideeffect "", "f,~{memory},~{$1}"(<4 x i32> ) + tail call void @foo() + tail call void asm sideeffect "", "f,~{memory},~{$1}"(<4 x i32> ) + ret void +} + +define dso_local void @test_ldi_d() { +; CHECK-LABEL: test_ldi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addiu $sp, $sp, -24 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset 31, -4 +; CHECK-NEXT: ldi.d $w0, 4 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jal foo +; CHECK-NEXT: nop +; CHECK-NEXT: ldi.d $w0, 4 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload +; CHECK-NEXT: jr $ra +; CHECK-NEXT: addiu $sp, $sp, 24 +entry: + tail call void asm sideeffect "", "f,~{memory},~{$1}"(<2 x i64> ) + tail call void @foo() + tail call void asm sideeffect "", "f,~{memory},~{$1}"(<2 x i64> ) + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"wchar_size", i32 4}