Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -434,7 +434,9 @@ else MIB.addOperand(MO); } - return NewMI; + + MachineBasicBlock &MBB = *MI->getParent(); + return MBB.insert(MachineBasicBlock::iterator(MI), NewMI); } /// foldMemoryOperand - Attempt to fold a load or store of the specified stack @@ -484,9 +486,7 @@ Flags, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); NewMI->addMemOperand(MF, MMO); - - // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI. - return MBB->insert(MI, NewMI); + return NewMI; } // Straight COPY may fold as load/store. @@ -538,8 +538,6 @@ if (!NewMI) return nullptr; - NewMI = MBB.insert(MI, NewMI); - // Copy the memoperands from the load to the folded instruction. if (MI->memoperands_empty()) { NewMI->setMemRefs(LoadMI->memoperands_begin(), Index: lib/Target/Mips/Mips16InstrInfo.h =================================================================== --- lib/Target/Mips/Mips16InstrInfo.h +++ lib/Target/Mips/Mips16InstrInfo.h @@ -62,6 +62,11 @@ const TargetRegisterInfo *TRI, int64_t Offset) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef Ops, + int FrameIndex) const override; + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; unsigned getOppositeBranchOpc(unsigned Opc) const override; Index: lib/Target/Mips/Mips16InstrInfo.cpp =================================================================== --- lib/Target/Mips/Mips16InstrInfo.cpp +++ lib/Target/Mips/Mips16InstrInfo.cpp @@ -126,6 +126,13 @@ .addMemOperand(MMO); } +MachineInstr *Mips16InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef Ops, + int FrameIndex) const { + return nullptr; +} + bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); switch(MI->getDesc().getOpcode()) { Index: lib/Target/Mips/MipsInstrInfo.h =================================================================== --- lib/Target/Mips/MipsInstrInfo.h +++ lib/Target/Mips/MipsInstrInfo.h @@ -103,6 +103,11 @@ loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0); } + virtual MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef Ops, + int FrameIndex) const = 0; + virtual void storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, Index: lib/Target/Mips/MipsSEInstrInfo.h =================================================================== --- lib/Target/Mips/MipsSEInstrInfo.h +++ lib/Target/Mips/MipsSEInstrInfo.h @@ -62,6 +62,11 @@ const TargetRegisterInfo *TRI, int64_t Offset) const override; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef Ops, + int FrameIndex) const override; + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; unsigned getOppositeBranchOpc(unsigned Opc) const override; Index: lib/Target/Mips/MipsSEInstrInfo.cpp =================================================================== --- lib/Target/Mips/MipsSEInstrInfo.cpp +++ lib/Target/Mips/MipsSEInstrInfo.cpp @@ -260,6 +260,32 @@ .addMemOperand(MMO); } +MachineInstr *MipsSEInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + ArrayRef Ops, + int FrameIndex) const { + unsigned Opc = MI->getOpcode(); + if ((Opc != Mips::ADDiu) && (Opc != Mips::DADDiu)) + return nullptr; + + unsigned Reg = MI->getOperand(1).getReg(); + if ((Reg != Mips::ZERO) && (Reg != Mips::ZERO_64)) + return nullptr; + + int64_t Imm = MI->getOperand(2).getImm(); + if (Imm != 0) + return nullptr; + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(MI->getOperand(0).getReg()); + + storeRegToStack(*MI->getParent(), MI, Reg, false, FrameIndex, RC, + MRI.getTargetRegisterInfo(), 0); + + return --MachineBasicBlock::iterator(MI); +} + + bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MachineBasicBlock &MBB = *MI->getParent(); bool isMicroMips = Subtarget.inMicroMipsMode(); Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -750,6 +750,7 @@ MachineInstr *MI, ArrayRef Ops, int FrameIndex) const { + MachineBasicBlock &MBB = *MI->getParent(); const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); unsigned Opcode = MI->getOpcode(); @@ -759,7 +760,7 @@ isInt<8>(MI->getOperand(2).getImm()) && !MI->getOperand(3).getReg()) { // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST - return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::AGSI)) + return BuildMI(MBB, MI, MI->getDebugLoc(), get(SystemZ::AGSI)) .addFrameIndex(FrameIndex).addImm(0) .addImm(MI->getOperand(2).getImm()); } @@ -780,7 +781,7 @@ isInt<8>(MI->getOperand(2).getImm())) { // A(G)HI %reg, CONST -> A(G)SI %mem, CONST Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI); - return BuildMI(MF, MI->getDebugLoc(), get(Opcode)) + return BuildMI(MBB, MI, MI->getDebugLoc(), get(Opcode)) .addFrameIndex(FrameIndex).addImm(0) .addImm(MI->getOperand(2).getImm()); } @@ -792,7 +793,7 @@ // source register instead. if (OpNum == 0) { unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD; - return BuildMI(MF, MI->getDebugLoc(), get(StoreOpcode)) + return BuildMI(MBB, MI, MI->getDebugLoc(), get(StoreOpcode)) .addOperand(MI->getOperand(1)).addFrameIndex(FrameIndex) .addImm(0).addReg(0); } @@ -801,7 +802,7 @@ if (OpNum == 1) { unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD; unsigned Dest = MI->getOperand(0).getReg(); - return BuildMI(MF, MI->getDebugLoc(), get(LoadOpcode), Dest) + return BuildMI(MBB, MI, MI->getDebugLoc(), get(LoadOpcode), Dest) .addFrameIndex(FrameIndex).addImm(0).addReg(0); } } @@ -824,14 +825,14 @@ if (MMO->getSize() == Size && !MMO->isVolatile()) { // Handle conversion of loads. if (isSimpleBD12Move(MI, SystemZII::SimpleBDXLoad)) { - return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) + return BuildMI(MBB, MI, MI->getDebugLoc(), get(SystemZ::MVC)) .addFrameIndex(FrameIndex).addImm(0).addImm(Size) .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) .addMemOperand(MMO); } // Handle conversion of stores. if (isSimpleBD12Move(MI, SystemZII::SimpleBDXStore)) { - return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::MVC)) + return BuildMI(MBB, MI, MI->getDebugLoc(), get(SystemZ::MVC)) .addOperand(MI->getOperand(1)).addImm(MI->getOperand(2).getImm()) .addImm(Size).addFrameIndex(FrameIndex).addImm(0) .addMemOperand(MMO); @@ -850,7 +851,8 @@ assert(AccessBytes != 0 && "Size of access should be known"); assert(AccessBytes <= Size && "Access outside the frame index"); uint64_t Offset = Size - AccessBytes; - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(MemOpcode)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), + get(MemOpcode)); for (unsigned I = 0; I < OpNum; ++I) MIB.addOperand(MI->getOperand(I)); MIB.addFrameIndex(FrameIndex).addImm(Offset); Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -3518,7 +3518,6 @@ return false; Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); - FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); MI->eraseFromParent(); return true; } Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -4769,6 +4769,7 @@ ArrayRef MOs, unsigned Size, unsigned Align, bool AllowCommute) const { + MachineBasicBlock &MBB = *MI->getParent(); const DenseMap > *OpcodeTablePtr = nullptr; bool isCallRegIndirect = Subtarget.callRegIndirect(); @@ -4804,7 +4805,7 @@ if (MI->getOpcode() == X86::MOV32r0) { NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); if (NewMI) - return NewMI; + return MBB.insert(MachineBasicBlock::iterator(MI), NewMI); } OpcodeTablePtr = &RegOp2MemOpTable0; @@ -4861,7 +4862,8 @@ else NewMI->getOperand(0).setSubReg(X86::sub_32bit); } - return NewMI; + + return MBB.insert(MachineBasicBlock::iterator(MI), NewMI); } } Index: test/CodeGen/Mips/fold-zero-copy.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/fold-zero-copy.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s +; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s +; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s +; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s + +; NOTE: This test was generated with csmith and reduced with bugpoint. +; We don't test for 64-bit targets because that requires performing the same +; time consuming procedure (csmith + bugpoint) again. Despite that, the code +; path for the checks of this test is the same for both 32-bit and 64-bit +; targets, so we are essentially checking both targets with just one test. + +@g_51 = external global [8 x [9 x [3 x i32]]], align 4 +@g_295 = external global i8, align 4 +@g_604 = external unnamed_addr global [6 x i16], align 2 +@g_1754 = external unnamed_addr global i64, align 8 +@g_1790 = external unnamed_addr global i32, align 4 +@g_721 = external global i32**, align 4 +@g_1478 = external global [7 x [2 x [5 x i8*]]], align 4 + +declare fastcc void @foo() + +define void @func_1() { +; CHECK-LABEL: func_1: + + ; CHECK: sw $zero, {{[0-9]+}}($sp) + ; CHECK-NOT: addiu $[[T0:[0-9]+]], $zero, 0 + ; CHECK-NOT: sw $[[T0]], {{[0-9]+}}($sp) + +for.cond.preheader: + br label %for.body + +for.body: + br i1 undef, label %for.end, label %for.body + +for.end: + %0 = phi i64 [ 1, %for.end ], [ 3, %for.body ] + %1 = load volatile i8*, i8** getelementptr inbounds ([7 x [2 x [5 x i8*]]], [7 x [2 x [5 x i8*]]]* @g_1478, i32 0, i32 3, i32 0, i32 4), align 4 + store i64 0, i64* @g_1754, align 8 + %cmp475 = icmp slt i64 %0, 0 + %conv484 = zext i1 %cmp475 to i32 + %2 = load i16, i16* getelementptr inbounds ([6 x i16], [6 x i16]* @g_604, i32 0, i32 5), align 2 + %conv4855 = zext i16 %2 to i32 + %and = and i32 %conv484, %conv4855 + %conv486 = trunc i32 %and to i16 + store i16 %conv486, i16* getelementptr inbounds ([6 x i16], [6 x i16]* @g_604, i32 0, i32 5), align 2 + store i8 0, i8* @g_295, align 4 + tail call fastcc void @foo() + store i32 1, i32* @g_1790, align 4 + %3 = load volatile i32**, i32*** @g_721, align 4 + store i32 1, i32* getelementptr inbounds ([8 x [9 x [3 x i32]]], [8 x [9 x [3 x i32]]]* @g_51, i32 0, i32 0, i32 5, i32 1), align 4 + br label %for.end +}