Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -288,6 +288,13 @@ "ermsb", "HasERMSB", "true", "REP MOVS/STOS are fast">; +// Sandy Bridge and newer processors have many instructions that can be +// fused with conditional branches and pass through the CPU as a single +// operation. +def FeatureMacroFusion + : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", + "Various instructions can be fused with conditional branches">; + //===----------------------------------------------------------------------===// // X86 processors supported. //===----------------------------------------------------------------------===// @@ -372,7 +379,8 @@ FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem, - FeatureLAHFSAHF + FeatureLAHFSAHF, + FeatureMacroFusion ]>; def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureX87, @@ -382,7 +390,8 @@ FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem, - FeatureLAHFSAHF + FeatureLAHFSAHF, + FeatureMacroFusion ]>; // Atom CPUs. @@ -468,7 +477,8 @@ FeatureCMPXCHG16B, FeatureSlowBTMem, FeaturePOPCNT, - FeatureLAHFSAHF + FeatureLAHFSAHF, + FeatureMacroFusion ]>; def : NehalemProc<"nehalem">; def : NehalemProc<"corei7">; @@ -485,7 +495,8 @@ FeaturePOPCNT, FeatureAES, FeaturePCLMUL, - FeatureLAHFSAHF + FeatureLAHFSAHF, + FeatureMacroFusion ]>; def : WestmereProc<"westmere">; @@ -516,7 +527,8 @@ FeatureLAHFSAHF, FeatureSlow3OpsLEA, FeatureFastScalarFSQRT, - FeatureFastSHLDRotate + FeatureFastSHLDRotate, + FeatureMacroFusion ]>; class SandyBridgeProc : ProcModel; // Piledriver def : Proc<"bdver2", [ @@ -755,7 +768,8 @@ FeatureLWP, FeatureFMA, FeatureSlowSHLD, - FeatureLAHFSAHF + FeatureLAHFSAHF, + FeatureMacroFusion ]>; // Steamroller @@ -782,7 +796,8 @@ FeatureXSAVEOPT, FeatureSlowSHLD, FeatureFSGSBase, - FeatureLAHFSAHF + FeatureLAHFSAHF, + FeatureMacroFusion ]>; // Excavator @@ -810,7 +825,8 @@ FeatureSlowSHLD, FeatureFSGSBase, FeatureLAHFSAHF, - FeatureMWAITX + FeatureMWAITX, + FeatureMacroFusion ]>; // Znver1 @@ -830,6 +846,7 @@ FeatureFastLZCNT, FeatureLAHFSAHF, FeatureLZCNT, + FeatureMacroFusion, FeatureMMX, FeatureMOVBE, FeatureMWAITX, @@ -873,7 +890,8 @@ Feature64Bit, FeatureSlow3OpsLEA, FeatureSlowBTMem, - FeatureSlowIncDec + FeatureSlowIncDec, + FeatureMacroFusion ]>; //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/X86/X86MacroFusion.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86MacroFusion.cpp +++ llvm/trunk/lib/Target/X86/X86MacroFusion.cpp @@ -27,10 +27,8 @@ const MachineInstr *FirstMI, const MachineInstr &SecondMI) { const X86Subtarget &ST = static_cast(TSI); - // Check if this processor supports macro-fusion. Since this is a minor - // heuristic, we haven't specifically reserved a feature. hasAVX is a decent - // proxy for SandyBridge+. - if (!ST.hasAVX()) + // Check if this processor supports macro-fusion. + if (!ST.hasMacroFusion()) return false; enum { Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -238,6 +238,9 @@ /// True if SHLD based rotate is fast. bool HasFastSHLDRotate; + /// True if the processor supports macrofusion. + bool HasMacroFusion; + /// True if the processor has enhanced REP MOVSB/STOSB. bool HasERMSB; @@ -488,6 +491,7 @@ bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } bool hasFastLZCNT() const { return HasFastLZCNT; } bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } + bool hasMacroFusion() const { return HasMacroFusion; } bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } Index: llvm/trunk/lib/Target/X86/X86Subtarget.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp @@ -347,6 +347,7 @@ HasFastVectorFSQRT = false; HasFastLZCNT = false; HasFastSHLDRotate = false; + HasMacroFusion = false; HasERMSB = false; HasSlowDivide32 = false; HasSlowDivide64 = false; Index: llvm/trunk/test/CodeGen/X86/avx-select.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-select.ll +++ llvm/trunk/test/CodeGen/X86/avx-select.ll @@ -16,8 +16,8 @@ ; ; X64-LABEL: select00: ; X64: # BB#0: -; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: cmpl $255, %edi +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: je .LBB0_2 ; X64-NEXT: # BB#1: ; X64-NEXT: vmovaps %ymm0, %ymm1 @@ -44,8 +44,8 @@ ; ; X64-LABEL: select01: ; X64: # BB#0: -; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: cmpl $255, %edi +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: je .LBB1_2 ; X64-NEXT: # BB#1: ; X64-NEXT: vmovaps %ymm0, %ymm1 Index: llvm/trunk/test/CodeGen/X86/avx-splat.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-splat.ll +++ llvm/trunk/test/CodeGen/X86/avx-splat.ll @@ -60,8 +60,8 @@ ; CHECK-LABEL: funcE: ; CHECK: # BB#0: # %for_exit499 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: # implicit-def: %YMM0 ; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: # implicit-def: %YMM0 ; CHECK-NEXT: jne .LBB4_2 ; CHECK-NEXT: # BB#1: # %load.i1247 ; CHECK-NEXT: pushq %rbp Index: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll +++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll @@ -692,8 +692,8 @@ ; ; AVX512BW-LABEL: test8: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512BW-NEXT: cmpl %esi, %edi +; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512BW-NEXT: jg LBB17_1 ; AVX512BW-NEXT: ## BB#2: ; AVX512BW-NEXT: vpcmpltud %zmm2, %zmm1, %k0 @@ -708,8 +708,8 @@ ; ; AVX512DQ-LABEL: test8: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512DQ-NEXT: cmpl %esi, %edi +; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512DQ-NEXT: jg LBB17_1 ; AVX512DQ-NEXT: ## BB#2: ; AVX512DQ-NEXT: vpcmpltud %zmm2, %zmm1, %k0 Index: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll +++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll @@ -1678,8 +1678,8 @@ ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; VEX-NEXT: .LBB39_6: ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: testq %rax, %rax +; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: js .LBB39_8 ; VEX-NEXT: # BB#7: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 @@ -1914,8 +1914,8 @@ ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; VEX-NEXT: .LBB41_6: ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: testq %rax, %rax +; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: js .LBB41_8 ; VEX-NEXT: # BB#7: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 Index: llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll +++ llvm/trunk/test/CodeGen/X86/x86-cmov-converter.ll @@ -296,9 +296,9 @@ ; CHECK-LABEL: Transform ; CHECK-NOT: cmov ; CHECK: divl [[a:%[0-9a-z]*]] -; CHECK: cmpl [[a]], %eax ; CHECK: movl $11, [[s1:%[0-9a-z]*]] ; CHECK: movl [[a]], [[s2:%[0-9a-z]*]] +; CHECK: cmpl [[a]], %edx ; CHECK: ja [[SinkBB:.*]] ; CHECK: [[FalseBB:.*]]: ; CHECK: movl $22, [[s1]]