Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -181,6 +181,11 @@ def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; +// TODO: This feature ought to be renamed. +// What it really refers to are CPUs where instruction that cause MSROM +// lookups are expensive, so alternative sequences should be preferred. +// The best examples of this are the memory forms of CALL and PUSH +// instructions, which should be avoided in favor of a MOV + register CALL/PUSH. def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", "CallRegIndirect", "true", "Call register indirect">; Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -332,6 +332,9 @@ { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD }, { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE }, { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE }, + { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD }, + { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD }, + { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD }, { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE }, { X86::SETAr, X86::SETAm, TB_FOLDED_STORE }, { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE }, @@ -4878,10 +4881,14 @@ bool isCallRegIndirect = Subtarget.callRegIndirect(); bool isTwoAddrFold = false; - // For CPUs that favor the register form of a call, - // do not fold loads into calls. - if (isCallRegIndirect && - (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) + // For CPUs that favor the register form of a call or push, + // do not fold loads into calls or pushes, unless optimizing for size + // aggressively. + if (isCallRegIndirect && + !MF.getFunction()->hasFnAttribute(Attribute::MinSize) && + (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r || + MI->getOpcode() == X86::PUSH16r || MI->getOpcode() == X86::PUSH32r || + MI->getOpcode() == X86::PUSH64r)) return nullptr; unsigned NumOps = MI->getDesc().getNumOperands(); Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td @@ -1022,12 +1022,8 @@ IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>; def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[], IIC_PUSH_REG>, OpSize16; -def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[], - IIC_PUSH_MEM>, OpSize16; def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[], IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>; -def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[], - IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>; def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm), "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16; @@ -1041,6 +1037,14 @@ "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[Not64BitMode]>; } // mayStore, SchedRW + +let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { +def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[], + IIC_PUSH_MEM>, OpSize16; +def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[], + IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>; +} // mayLoad, mayStore, SchedRW + } let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0, @@ -1073,9 +1077,11 @@ IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>; def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>; +} // mayStore, SchedRW +let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], IIC_PUSH_MEM>, OpSize32, Requires<[In64BitMode]>; -} // mayStore, SchedRW +} // mayLoad, mayStore, SchedRW } let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1, Index: llvm/trunk/test/CodeGen/X86/fold-push.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fold-push.ll +++ llvm/trunk/test/CodeGen/X86/fold-push.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL +; RUN: llc < %s -mtriple=i686-windows -mattr=call-reg-indirect | FileCheck %s -check-prefix=CHECK -check-prefix=SLM + +declare void @foo(i32 %r) + +define void @test(i32 %a, i32 %b) optsize { +; CHECK-LABEL: test: +; CHECK: movl [[EAX:%e..]], (%esp) +; CHECK-NEXT: pushl [[EAX]] +; CHECK-NEXT: calll +; CHECK-NEXT: addl $4, %esp +; CHECK: nop +; NORMAL: pushl (%esp) +; SLM: movl (%esp), [[RELOAD:%e..]] +; SLM-NEXT: pushl [[RELOAD]] +; CHECK: calll +; CHECK-NEXT: addl $4, %esp + %c = add i32 %a, %b + call void @foo(i32 %c) + call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() + call void @foo(i32 %c) + ret void +} + +define void @test_min(i32 %a, i32 %b) minsize { +; CHECK-LABEL: test_min: +; CHECK: movl [[EAX:%e..]], (%esp) +; CHECK-NEXT: pushl [[EAX]] +; CHECK-NEXT: calll +; CHECK-NEXT: addl $4, %esp +; CHECK: nop +; CHECK: pushl (%esp) +; CHECK: calll +; CHECK-NEXT: addl $4, %esp + %c = add i32 %a, %b + call void @foo(i32 %c) + call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() + call void @foo(i32 %c) + ret void +}