Index: llvm/lib/CodeGen/MachineOutliner.cpp =================================================================== --- llvm/lib/CodeGen/MachineOutliner.cpp +++ llvm/lib/CodeGen/MachineOutliner.cpp @@ -56,6 +56,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineFunction.h" @@ -1245,31 +1246,53 @@ // make sure that the ranges we yank things out of aren't wrong. if (MBB.getParent()->getProperties().hasProperty( MachineFunctionProperties::Property::TracksLiveness)) { - // Helper lambda for adding implicit def operands to the call + // The following code is to add implicit def operands to the call // instruction. It also updates call site information for moved // code. - auto CopyDefsAndUpdateCalls = [&CallInst](MachineInstr &MI) { - for (MachineOperand &MOP : MI.operands()) { - // Skip over anything that isn't a register. - if (!MOP.isReg()) - continue; - - // If it's a def, add it to the call instruction. - if (MOP.isDef()) - CallInst->addOperand(MachineOperand::CreateReg( - MOP.getReg(), true, /* isDef = true */ - true /* isImp = true */)); - } - if (MI.isCandidateForCallSiteEntry()) - MI.getMF()->eraseCallSiteInfo(&MI); - }; + SmallSet UseRegs, DefRegs; // Copy over the defs in the outlined range. // First inst in outlined range <-- Anything that's defined in this // ... .. range has to be added as an // implicit Last inst in outlined range <-- def to the call // instruction. Also remove call site information for outlined block - // of code. - std::for_each(CallInst, std::next(EndIt), CopyDefsAndUpdateCalls); + // of code. The exposed uses need to be copied in the outlined range. + for (MachineBasicBlock::reverse_iterator Iter = EndIt.getReverse(), + Last = std::next(CallInst.getReverse()); + Iter != Last; Iter++) { + MachineInstr *MI = &*Iter; + for (MachineOperand &MOP : MI->operands()) { + // Skip over anything that isn't a register. + if (!MOP.isReg()) + continue; + + if (MOP.isDef()) { + // Introduce DefRegs set to skip the redundant register. + DefRegs.insert(MOP.getReg()); + if (UseRegs.count(MOP.getReg())) + // Since the regiester is modeled as defined, + // it is not necessary to be put in use register set. + UseRegs.erase(MOP.getReg()); + } else if (!MOP.isUndef()) { + // Any register which is not undefined should + // be put in the use register set. + UseRegs.insert(MOP.getReg()); + } + } + if (MI->isCandidateForCallSiteEntry()) + MI->getMF()->eraseCallSiteInfo(MI); + } + + for (const Register &I : DefRegs) + // If it's a def, add it to the call instruction. + CallInst->addOperand(MachineOperand::CreateReg( + I, true, /* isDef = true */ + true /* isImp = true */)); + + for (const Register &I : UseRegs) + // If it's a exposed use, add it to the call instruction. + CallInst->addOperand( + MachineOperand::CreateReg(I, false, /* isDef = false */ + true /* isImp = true */)); } // Erase from the point after where the call was inserted up to, and Index: llvm/test/CodeGen/AArch64/machine-outliner-side-effect.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/machine-outliner-side-effect.mir @@ -0,0 +1,122 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s + +# The test checks whether the compiler updates the side effect of function @OUTLINED_FUNCTION_0 by adding the use of register x20. + +--- | + target triple = "aarch64-apple-darwin" + + %0 = type { %1*, i64 } + %1 = type { i64 } + %2 = type opaque + %3 = type <{ %4 }> + %4 = type <{ %5 }> + %5 = type <{ %2* }> + %6 = type <{ %0, %7, %3 }> + %7 = type <{ %8, %8, %8, %8 }> + %8 = type <{ %9 }> + %9 = type <{ double }> + + declare %2* @blam.5(%2* returned) local_unnamed_addr + + declare i64 @widget.7(%2*, i64, i64, i64) local_unnamed_addr + + declare swiftcc i64 @spam(%2*) local_unnamed_addr + + define i64 @baz.14() { + bb: + %tmp37 = call swiftcc %6* @barney.16() + %tmp57 = getelementptr inbounds %6, %6* %tmp37, i64 0, i32 2 + %tmp621 = bitcast %3* %tmp57 to %2** + %tmp63 = load %2*, %2** %tmp621, align 8 + %tmp64 = ptrtoint %2* %tmp63 to i64 + %tmp67 = icmp sgt %2* %tmp63, inttoptr (i64 -1 to %2*) + %tmp68 = bitcast %2* %tmp63 to %2* + %tmp69 = and i64 %tmp64, 1152921504606846968 + %tmp70 = inttoptr i64 %tmp69 to %2* + %tmp71 = select i1 %tmp67, %2* %tmp70, %2* %tmp68 + %tmp72 = call %2* @blam.5(%2* returned %tmp63) + %tmp73 = call swiftcc i64 @spam(%2* %tmp71) + %tmp74 = call swiftcc i64 @spam(%2* %tmp71) + %tmp75 = call swiftcc i64 @spam(%2* %tmp71) + %tmp76 = call i64 @widget.7(%2* %tmp72, i64 %tmp73, i64 %tmp74, i64 %tmp75) + ret i64 %tmp76 + } + + declare hidden swiftcc %6* @barney.16() local_unnamed_addr + +... +--- +name: baz.14 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.bb: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @barney.16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + renamable $x19 = LDRXui killed renamable $x0, 6 :: (load 8 from %ir.tmp621) + renamable $x8 = ANDXri renamable $x19, 8056 + dead $xzr = SUBSXri renamable $x19, 0, 0, implicit-def $nzcv + renamable $x20 = CSELXr killed renamable $x8, renamable $x19, 10, implicit killed $nzcv + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY renamable $x19 + BL @blam.5, csr_aarch64_aapcs_thisreturn, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY renamable $x20 + BL @spam, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + renamable $x21 = COPY $x0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY renamable $x20 + BL @spam, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + renamable $x22 = COPY $x0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY killed renamable $x20 + BL @spam, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + renamable $x3 = COPY $x0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY killed renamable $x19 + $x1 = COPY killed renamable $x21 + $x2 = COPY killed renamable $x22 + BL @widget.7, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit $x3, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + RET_ReallyLR implicit $x0 + +... + +# CHECK: BL @OUTLINED_FUNCTION_0, {{.*}}, implicit $x20, {{.*}}