Diff 192626

llvm/trunk/lib/Target/X86/X86.td

Show First 20 Lines • Show All 338 Lines • ▼ Show 20 Lines
// Development Manual. This feature essentially means that REP MOVSB will copy		// Development Manual. This feature essentially means that REP MOVSB will copy
// using the largest available size instead of copying bytes one by one, making		// using the largest available size instead of copying bytes one by one, making
// it at least as fast as REPMOVS{W,D,Q}.		// it at least as fast as REPMOVS{W,D,Q}.
def FeatureERMSB		def FeatureERMSB
: SubtargetFeature<		: SubtargetFeature<
"ermsb", "HasERMSB", "true",		"ermsb", "HasERMSB", "true",
"REP MOVS/STOS are fast">;		"REP MOVS/STOS are fast">;

		// Bulldozer and newer processors can merge CMP/TEST (but not other
		// instructions) with conditional branches.
		def FeatureBranchFusion
		: SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
		"CMP/TEST can be fused with conditional branches">;

// Sandy Bridge and newer processors have many instructions that can be		// Sandy Bridge and newer processors have many instructions that can be
// fused with conditional branches and pass through the CPU as a single		// fused with conditional branches and pass through the CPU as a single
// operation.		// operation.
def FeatureMacroFusion		def FeatureMacroFusion
: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",		: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
"Various instructions can be fused with conditional branches">;		"Various instructions can be fused with conditional branches">;

// Gather is available since Haswell (AVX2 set). So technically, we can		// Gather is available since Haswell (AVX2 set). So technically, we can
▲ Show 20 Lines • Show All 450 Lines • ▼ Show 20 Lines	list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87,
FeatureNOPL,		FeatureNOPL,
FeatureLZCNT,		FeatureLZCNT,
FeaturePOPCNT,		FeaturePOPCNT,
FeatureXSAVE,		FeatureXSAVE,
FeatureLWP,		FeatureLWP,
FeatureSlowSHLD,		FeatureSlowSHLD,
FeatureLAHFSAHF,		FeatureLAHFSAHF,
FeatureFast11ByteNOP,		FeatureFast11ByteNOP,
FeatureMacroFusion];		FeatureBranchFusion];
list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;		list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;

// PileDriver		// PileDriver
list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,		list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
FeatureBMI,		FeatureBMI,
FeatureTBM,		FeatureTBM,
FeatureFMA,		FeatureFMA,
FeatureFastBEXTR];		FeatureFastBEXTR];
Show All 33 Lines	list<SubtargetFeature> ZNFeatures = [FeatureADX,
FeatureFSGSBase,		FeatureFSGSBase,
FeatureFXSR,		FeatureFXSR,
FeatureNOPL,		FeatureNOPL,
FeatureFastLZCNT,		FeatureFastLZCNT,
FeatureLAHFSAHF,		FeatureLAHFSAHF,
FeatureLZCNT,		FeatureLZCNT,
FeatureFastBEXTR,		FeatureFastBEXTR,
FeatureFast15ByteNOP,		FeatureFast15ByteNOP,
FeatureMacroFusion,		FeatureBranchFusion,
FeatureMMX,		FeatureMMX,
FeatureMOVBE,		FeatureMOVBE,
FeatureMWAITX,		FeatureMWAITX,
FeaturePCLMUL,		FeaturePCLMUL,
FeaturePOPCNT,		FeaturePOPCNT,
FeaturePRFCHW,		FeaturePRFCHW,
FeatureRDRAND,		FeatureRDRAND,
FeatureRDSEED,		FeatureRDSEED,
▲ Show 20 Lines • Show All 340 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86MacroFusion.cpp

Show All 12 Lines

#include "X86MacroFusion.h"		#include "X86MacroFusion.h"
#include "X86Subtarget.h"		#include "X86Subtarget.h"
#include "llvm/CodeGen/MacroFusion.h"		#include "llvm/CodeGen/MacroFusion.h"
#include "llvm/CodeGen/TargetInstrInfo.h"		#include "llvm/CodeGen/TargetInstrInfo.h"

using namespace llvm;		using namespace llvm;

/// Check if the instr pair, FirstMI and SecondMI, should be fused		namespace {
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const TargetSubtargetInfo &TSI,
const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);
// Check if this processor supports macro-fusion.
if (!ST.hasMacroFusion())
return false;

enum {		// The classification for the first instruction.
FuseTest,		enum class FirstInstrKind { Test, Cmp, And, ALU, IncDec, Invalid };
FuseCmp,
FuseInc
} FuseKind;

unsigned FirstOpcode = FirstMI
? FirstMI->getOpcode()
: static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
unsigned SecondOpcode = SecondMI.getOpcode();

switch (SecondOpcode) {		// The classification for the second instruction (jump).
default:		enum class JumpKind {
return false;		// JE, JL, JG and variants.
case X86::JE_1:		ELG,
case X86::JNE_1:		// JA, JB and variants.
case X86::JL_1:		AB,
case X86::JLE_1:		// JS, JP, JO and variants.
case X86::JG_1:		SPO,
case X86::JGE_1:		// Not a fusable jump.
FuseKind = FuseInc;		Invalid,
break;		};
case X86::JB_1:
case X86::JBE_1:
case X86::JA_1:
case X86::JAE_1:
FuseKind = FuseCmp;
break;
case X86::JS_1:
case X86::JNS_1:
case X86::JP_1:
case X86::JNP_1:
case X86::JO_1:
case X86::JNO_1:
FuseKind = FuseTest;
break;
}

switch (FirstOpcode) {		} // namespace

		static FirstInstrKind classifyFirst(const MachineInstr &MI) {
		switch (MI.getOpcode()) {
default:		default:
return false;		return FirstInstrKind::Invalid;
case X86::TEST8rr:		case X86::TEST8rr:
case X86::TEST16rr:		case X86::TEST16rr:
case X86::TEST32rr:		case X86::TEST32rr:
case X86::TEST64rr:		case X86::TEST64rr:
case X86::TEST8ri:		case X86::TEST8ri:
case X86::TEST16ri:		case X86::TEST16ri:
case X86::TEST32ri:		case X86::TEST32ri:
case X86::TEST64ri32:		case X86::TEST64ri32:
case X86::TEST8mr:		case X86::TEST8mr:
case X86::TEST16mr:		case X86::TEST16mr:
case X86::TEST32mr:		case X86::TEST32mr:
case X86::TEST64mr:		case X86::TEST64mr:
		return FirstInstrKind::Test;
case X86::AND16ri:		case X86::AND16ri:
case X86::AND16ri8:		case X86::AND16ri8:
case X86::AND16rm:		case X86::AND16rm:
case X86::AND16rr:		case X86::AND16rr:
case X86::AND32ri:		case X86::AND32ri:
case X86::AND32ri8:		case X86::AND32ri8:
case X86::AND32rm:		case X86::AND32rm:
case X86::AND32rr:		case X86::AND32rr:
case X86::AND64ri32:		case X86::AND64ri32:
case X86::AND64ri8:		case X86::AND64ri8:
case X86::AND64rm:		case X86::AND64rm:
case X86::AND64rr:		case X86::AND64rr:
case X86::AND8ri:		case X86::AND8ri:
case X86::AND8rm:		case X86::AND8rm:
case X86::AND8rr:		case X86::AND8rr:
return true;		return FirstInstrKind::And;
case X86::CMP16ri:		case X86::CMP16ri:
case X86::CMP16ri8:		case X86::CMP16ri8:
case X86::CMP16rm:		case X86::CMP16rm:
case X86::CMP16rr:		case X86::CMP16rr:
case X86::CMP16mr:		case X86::CMP16mr:
case X86::CMP32ri:		case X86::CMP32ri:
case X86::CMP32ri8:		case X86::CMP32ri8:
case X86::CMP32rm:		case X86::CMP32rm:
case X86::CMP32rr:		case X86::CMP32rr:
case X86::CMP32mr:		case X86::CMP32mr:
case X86::CMP64ri32:		case X86::CMP64ri32:
case X86::CMP64ri8:		case X86::CMP64ri8:
case X86::CMP64rm:		case X86::CMP64rm:
case X86::CMP64rr:		case X86::CMP64rr:
case X86::CMP64mr:		case X86::CMP64mr:
case X86::CMP8ri:		case X86::CMP8ri:
case X86::CMP8rm:		case X86::CMP8rm:
case X86::CMP8rr:		case X86::CMP8rr:
case X86::CMP8mr:		case X86::CMP8mr:
		return FirstInstrKind::Cmp;
case X86::ADD16ri:		case X86::ADD16ri:
case X86::ADD16ri8:		case X86::ADD16ri8:
case X86::ADD16ri8_DB:		case X86::ADD16ri8_DB:
case X86::ADD16ri_DB:		case X86::ADD16ri_DB:
case X86::ADD16rm:		case X86::ADD16rm:
case X86::ADD16rr:		case X86::ADD16rr:
case X86::ADD16rr_DB:		case X86::ADD16rr_DB:
case X86::ADD32ri:		case X86::ADD32ri:
Show All 25 Lines	static FirstInstrKind classifyFirst(const MachineInstr &MI) {
case X86::SUB32rr:		case X86::SUB32rr:
case X86::SUB64ri32:		case X86::SUB64ri32:
case X86::SUB64ri8:		case X86::SUB64ri8:
case X86::SUB64rm:		case X86::SUB64rm:
case X86::SUB64rr:		case X86::SUB64rr:
case X86::SUB8ri:		case X86::SUB8ri:
case X86::SUB8rm:		case X86::SUB8rm:
case X86::SUB8rr:		case X86::SUB8rr:
return FuseKind == FuseCmp \|\| FuseKind == FuseInc;		return FirstInstrKind::ALU;
case X86::INC16r:		case X86::INC16r:
case X86::INC32r:		case X86::INC32r:
case X86::INC64r:		case X86::INC64r:
case X86::INC8r:		case X86::INC8r:
case X86::DEC16r:		case X86::DEC16r:
case X86::DEC32r:		case X86::DEC32r:
case X86::DEC64r:		case X86::DEC64r:
case X86::DEC8r:		case X86::DEC8r:
return FuseKind == FuseInc;		return FirstInstrKind::IncDec;
case X86::INSTRUCTION_LIST_END:		}
		}

		static JumpKind classifySecond(const MachineInstr &MI) {
		switch (MI.getOpcode()) {
		default:
		return JumpKind::Invalid;
		case X86::JE_1:
		case X86::JNE_1:
		case X86::JL_1:
		case X86::JLE_1:
		case X86::JG_1:
		case X86::JGE_1:
		return JumpKind::ELG;
		case X86::JB_1:
		case X86::JBE_1:
		case X86::JA_1:
		case X86::JAE_1:
		return JumpKind::AB;
		case X86::JS_1:
		case X86::JNS_1:
		case X86::JP_1:
		case X86::JNP_1:
		case X86::JO_1:
		case X86::JNO_1:
		return JumpKind::SPO;
		}
		}

		/// Check if the instr pair, FirstMI and SecondMI, should be fused
		/// together. Given SecondMI, when FirstMI is unspecified, then check if
		/// SecondMI may be part of a fused pair at all.
		static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
		const TargetSubtargetInfo &TSI,
		const MachineInstr *FirstMI,
		const MachineInstr &SecondMI) {
		const X86Subtarget &ST = static_cast<const X86Subtarget &>(TSI);

		// Check if this processor supports any kind of fusion.
		if (!(ST.hasBranchFusion() \|\| ST.hasMacroFusion()))
		return false;

		const JumpKind BranchKind = classifySecond(SecondMI);

		if (BranchKind == JumpKind::Invalid)
		return false; // Second cannot be fused with anything.

		if (FirstMI == nullptr)
		return true; // We're only checking whether Second can be fused at all.

		const FirstInstrKind TestKind = classifyFirst(*FirstMI);

		if (ST.hasBranchFusion()) {
		// Branch fusion can merge CMP and TEST with all conditional jumps.
		return (TestKind == FirstInstrKind::Cmp \|\|
		TestKind == FirstInstrKind::Test);
		}

		if (ST.hasMacroFusion()) {
		// Macro Fusion rules are a bit more complex. See Agner Fog's
		// Microarchitecture table 9.2 "Instruction Fusion".
		switch (TestKind) {
		case FirstInstrKind::Test:
		case FirstInstrKind::And:
return true;		return true;
		case FirstInstrKind::Cmp:
		case FirstInstrKind::ALU:
		return BranchKind == JumpKind::ELG \|\| BranchKind == JumpKind::AB;
		case FirstInstrKind::IncDec:
		return BranchKind == JumpKind::ELG;
		case FirstInstrKind::Invalid:
		return false;
}		}
}		}

		llvm_unreachable("unknown branch fusion type");
		}

namespace llvm {		namespace llvm {

std::unique_ptr<ScheduleDAGMutation>		std::unique_ptr<ScheduleDAGMutation>
createX86MacroFusionDAGMutation () {		createX86MacroFusionDAGMutation () {
return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);		return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);
}		}

} // end namespace llvm		} // end namespace llvm

llvm/trunk/lib/Target/X86/X86Subtarget.h

Show First 20 Lines • Show All 291 Lines • ▼ Show 20 Lines	protected:
bool HasFastLZCNT = false;		bool HasFastLZCNT = false;

/// True if SHLD based rotate is fast.		/// True if SHLD based rotate is fast.
bool HasFastSHLDRotate = false;		bool HasFastSHLDRotate = false;

/// True if the processor supports macrofusion.		/// True if the processor supports macrofusion.
bool HasMacroFusion = false;		bool HasMacroFusion = false;

		/// True if the processor supports branch fusion.
		bool HasBranchFusion = false;

/// True if the processor has enhanced REP MOVSB/STOSB.		/// True if the processor has enhanced REP MOVSB/STOSB.
bool HasERMSB = false;		bool HasERMSB = false;

/// True if the short functions should be padded to prevent		/// True if the short functions should be padded to prevent
/// a stall when returning too early.		/// a stall when returning too early.
bool PadShortFunctions = false;		bool PadShortFunctions = false;

/// True if two memory operand instructions should use a temporary register		/// True if two memory operand instructions should use a temporary register
▲ Show 20 Lines • Show All 329 Lines • ▼ Show 20 Lines	public:
bool hasFastGather() const { return HasFastGather; }		bool hasFastGather() const { return HasFastGather; }
bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }		bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }		bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }		bool hasFastLZCNT() const { return HasFastLZCNT; }
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }		bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
bool hasFastBEXTR() const { return HasFastBEXTR; }		bool hasFastBEXTR() const { return HasFastBEXTR; }
bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }		bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
bool hasMacroFusion() const { return HasMacroFusion; }		bool hasMacroFusion() const { return HasMacroFusion; }
		bool hasBranchFusion() const { return HasBranchFusion; }
bool hasERMSB() const { return HasERMSB; }		bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }		bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }		bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }		bool padShortFunctions() const { return PadShortFunctions; }
bool slowTwoMemOps() const { return SlowTwoMemOps; }		bool slowTwoMemOps() const { return SlowTwoMemOps; }
bool LEAusesAG() const { return LEAUsesAG; }		bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }		bool slowLEA() const { return SlowLEA; }
bool slow3OpsLEA() const { return Slow3OpsLEA; }		bool slow3OpsLEA() const { return Slow3OpsLEA; }
▲ Show 20 Lines • Show All 199 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h

Show First 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	const FeatureBitset InlineFeatureIgnoreList = {
X86::FeatureFastPartialYMMorZMMWrite,		X86::FeatureFastPartialYMMorZMMWrite,
X86::FeatureFastScalarFSQRT,		X86::FeatureFastScalarFSQRT,
X86::FeatureFastSHLDRotate,		X86::FeatureFastSHLDRotate,
X86::FeatureFastVariableShuffle,		X86::FeatureFastVariableShuffle,
X86::FeatureFastVectorFSQRT,		X86::FeatureFastVectorFSQRT,
X86::FeatureLEAForSP,		X86::FeatureLEAForSP,
X86::FeatureLEAUsesAG,		X86::FeatureLEAUsesAG,
X86::FeatureLZCNTFalseDeps,		X86::FeatureLZCNTFalseDeps,
		X86::FeatureBranchFusion,
X86::FeatureMacroFusion,		X86::FeatureMacroFusion,
X86::FeatureMergeToThreeWayBranch,		X86::FeatureMergeToThreeWayBranch,
X86::FeaturePadShortFunctions,		X86::FeaturePadShortFunctions,
X86::FeaturePOPCNTFalseDeps,		X86::FeaturePOPCNTFalseDeps,
X86::FeatureSSEUnalignedMem,		X86::FeatureSSEUnalignedMem,
X86::FeatureSlow3OpsLEA,		X86::FeatureSlow3OpsLEA,
X86::FeatureSlowDivide32,		X86::FeatureSlowDivide32,
X86::FeatureSlowDivide64,		X86::FeatureSlowDivide64,
▲ Show 20 Lines • Show All 142 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp

Show First 20 Lines • Show All 2,978 Lines • ▼ Show 20 Lines	return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
C1.NumIVMuls, C1.NumBaseAdds,		C1.NumIVMuls, C1.NumBaseAdds,
C1.ScaleCost, C1.ImmCost, C1.SetupCost) <		C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,		std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
C2.NumIVMuls, C2.NumBaseAdds,		C2.NumIVMuls, C2.NumBaseAdds,
C2.ScaleCost, C2.ImmCost, C2.SetupCost);		C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}		}

bool X86TTIImpl::canMacroFuseCmp() {		bool X86TTIImpl::canMacroFuseCmp() {
return ST->hasMacroFusion();		return ST->hasMacroFusion() \|\| ST->hasBranchFusion();
}		}

bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {		bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
if (!ST->hasAVX())		if (!ST->hasAVX())
return false;		return false;

// The backend can't handle a single element vector.		// The backend can't handle a single element vector.
if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1)		if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1)
▲ Show 20 Lines • Show All 457 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/testb-je-fusion.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-- -mattr=-macrofusion \| FileCheck %s --check-prefix=NOFUSION			; RUN: llc < %s -mtriple=x86_64-- -mattr=-macrofusion,-branchfusion \| FileCheck %s --check-prefix=NOFUSION
	; RUN: llc < %s -mtriple=x86_64-- -mattr=+macrofusion \| FileCheck %s --check-prefix=MACROFUSION			; RUN: llc < %s -mtriple=x86_64-- -mattr=-macrofusion,+branchfusion \| FileCheck %s --check-prefix=BRANCHFUSION --check-prefix=BRANCHFUSIONONLY
				; RUN: llc < %s -mtriple=x86_64-- -mattr=+macrofusion,-branchfusion \| FileCheck %s --check-prefix=BRANCHFUSION --check-prefix=MACROFUSION

	; testb should be scheduled right before je to enable macro-fusion.			; testb should be scheduled right before je to enable macro-fusion.

	define i32 @macrofuse_test_je(i32 %flags, i8* %p) nounwind {			define i32 @macrofuse_test_je(i32 %flags, i8* %p) nounwind {
	; NOFUSION-LABEL: macrofuse_test_je:			; NOFUSION-LABEL: macrofuse_test_je:
	; NOFUSION: # %bb.0: # %entry			; NOFUSION: # %bb.0: # %entry
	; NOFUSION-NEXT: xorl %eax, %eax			; NOFUSION-NEXT: xorl %eax, %eax
	; NOFUSION-NEXT: testl $512, %edi # imm = 0x200			; NOFUSION-NEXT: testl $512, %edi # imm = 0x200
	; NOFUSION-NEXT: movb $1, (%rsi)			; NOFUSION-NEXT: movb $1, (%rsi)
	; NOFUSION-NEXT: je .LBB0_2			; NOFUSION-NEXT: je .LBB0_2
	; NOFUSION-NEXT: # %bb.1: # %if.then			; NOFUSION-NEXT: # %bb.1: # %if.then
	; NOFUSION-NEXT: movl $1, %eax			; NOFUSION-NEXT: movl $1, %eax
	; NOFUSION-NEXT: .LBB0_2: # %if.end			; NOFUSION-NEXT: .LBB0_2: # %if.end
	; NOFUSION-NEXT: retq			; NOFUSION-NEXT: retq
	;			;
	; MACROFUSION-LABEL: macrofuse_test_je:			; BRANCHFUSION-LABEL: macrofuse_test_je:
	; MACROFUSION: # %bb.0: # %entry			; BRANCHFUSION: # %bb.0: # %entry
	; MACROFUSION-NEXT: xorl %eax, %eax			; BRANCHFUSION-NEXT: xorl %eax, %eax
	; MACROFUSION-NEXT: movb $1, (%rsi)			; BRANCHFUSION-NEXT: movb $1, (%rsi)
	; MACROFUSION-NEXT: testl $512, %edi # imm = 0x200			; BRANCHFUSION-NEXT: testl $512, %edi # imm = 0x200
	; MACROFUSION-NEXT: je .LBB0_2			; BRANCHFUSION-NEXT: je .LBB0_2
	; MACROFUSION-NEXT: # %bb.1: # %if.then			; BRANCHFUSION-NEXT: # %bb.1: # %if.then
	; MACROFUSION-NEXT: movl $1, %eax			; BRANCHFUSION-NEXT: movl $1, %eax
	; MACROFUSION-NEXT: .LBB0_2: # %if.end			; BRANCHFUSION-NEXT: .LBB0_2: # %if.end
	; MACROFUSION-NEXT: retq			; BRANCHFUSION-NEXT: retq
	entry:			entry:
	%and = and i32 %flags, 512			%and = and i32 %flags, 512
	%tobool = icmp eq i32 %and, 0			%tobool = icmp eq i32 %and, 0
	store i8 1, i8* %p			store i8 1, i8* %p
	br i1 %tobool, label %if.end, label %if.then			br i1 %tobool, label %if.end, label %if.then

	if.then:			if.then:
	br label %if.end			br label %if.end
	Show All 11 Lines
	; NOFUSION-NEXT: je .LBB1_1			; NOFUSION-NEXT: je .LBB1_1
	; NOFUSION-NEXT: # %bb.2: # %if.then			; NOFUSION-NEXT: # %bb.2: # %if.then
	; NOFUSION-NEXT: movl $1, %eax			; NOFUSION-NEXT: movl $1, %eax
	; NOFUSION-NEXT: retq			; NOFUSION-NEXT: retq
	; NOFUSION-NEXT: .LBB1_1:			; NOFUSION-NEXT: .LBB1_1:
	; NOFUSION-NEXT: xorl %eax, %eax			; NOFUSION-NEXT: xorl %eax, %eax
	; NOFUSION-NEXT: retq			; NOFUSION-NEXT: retq
	;			;
	; MACROFUSION-LABEL: macrofuse_cmp_je:			; BRANCHFUSION-LABEL: macrofuse_cmp_je:
	; MACROFUSION: # %bb.0: # %entry			; BRANCHFUSION: # %bb.0: # %entry
	; MACROFUSION-NEXT: movb $1, (%rsi)			; BRANCHFUSION-NEXT: movb $1, (%rsi)
	; MACROFUSION-NEXT: cmpl $512, %edi # imm = 0x200			; BRANCHFUSION-NEXT: cmpl $512, %edi # imm = 0x200
	; MACROFUSION-NEXT: je .LBB1_1			; BRANCHFUSION-NEXT: je .LBB1_1
	; MACROFUSION-NEXT: # %bb.2: # %if.then			; BRANCHFUSION-NEXT: # %bb.2: # %if.then
	; MACROFUSION-NEXT: movl $1, %eax			; BRANCHFUSION-NEXT: movl $1, %eax
	; MACROFUSION-NEXT: retq			; BRANCHFUSION-NEXT: retq
	; MACROFUSION-NEXT: .LBB1_1:			; BRANCHFUSION-NEXT: .LBB1_1:
	; MACROFUSION-NEXT: xorl %eax, %eax			; BRANCHFUSION-NEXT: xorl %eax, %eax
	; MACROFUSION-NEXT: retq			; BRANCHFUSION-NEXT: retq
	entry:			entry:
	%sub = sub i32 %flags, 512			%sub = sub i32 %flags, 512
	%tobool = icmp eq i32 %sub, 0			%tobool = icmp eq i32 %sub, 0
	store i8 1, i8* %p			store i8 1, i8* %p
	br i1 %tobool, label %if.end, label %if.then			br i1 %tobool, label %if.end, label %if.then

	if.then:			if.then:
	br label %if.end			br label %if.end
	Show All 10 Lines
	; NOFUSION-NEXT: addl $-512, %eax # imm = 0xFE00			; NOFUSION-NEXT: addl $-512, %eax # imm = 0xFE00
	; NOFUSION-NEXT: movb $1, (%rsi)			; NOFUSION-NEXT: movb $1, (%rsi)
	; NOFUSION-NEXT: je .LBB2_2			; NOFUSION-NEXT: je .LBB2_2
	; NOFUSION-NEXT: # %bb.1: # %if.then			; NOFUSION-NEXT: # %bb.1: # %if.then
	; NOFUSION-NEXT: movl $1, %eax			; NOFUSION-NEXT: movl $1, %eax
	; NOFUSION-NEXT: .LBB2_2: # %if.end			; NOFUSION-NEXT: .LBB2_2: # %if.end
	; NOFUSION-NEXT: retq			; NOFUSION-NEXT: retq
	;			;
				; BRANCHFUSIONONLY-LABEL: macrofuse_alu_je:
				; BRANCHFUSIONONLY: # %bb.0: # %entry
				; BRANCHFUSIONONLY-NEXT: movl %edi, %eax
				; BRANCHFUSIONONLY-NEXT: addl $-512, %eax # imm = 0xFE00
				; BRANCHFUSIONONLY-NEXT: movb $1, (%rsi)
				; BRANCHFUSIONONLY-NEXT: je .LBB2_2
				; BRANCHFUSIONONLY-NEXT: # %bb.1: # %if.then
				; BRANCHFUSIONONLY-NEXT: movl $1, %eax
				; BRANCHFUSIONONLY-NEXT: .LBB2_2: # %if.end
				; BRANCHFUSIONONLY-NEXT: retq
				;
	; MACROFUSION-LABEL: macrofuse_alu_je:			; MACROFUSION-LABEL: macrofuse_alu_je:
	; MACROFUSION: # %bb.0: # %entry			; MACROFUSION: # %bb.0: # %entry
	; MACROFUSION-NEXT: movl %edi, %eax			; MACROFUSION-NEXT: movl %edi, %eax
	; MACROFUSION-NEXT: movb $1, (%rsi)			; MACROFUSION-NEXT: movb $1, (%rsi)
	; MACROFUSION-NEXT: addl $-512, %eax # imm = 0xFE00			; MACROFUSION-NEXT: addl $-512, %eax # imm = 0xFE00
	; MACROFUSION-NEXT: je .LBB2_2			; MACROFUSION-NEXT: je .LBB2_2
	; MACROFUSION-NEXT: # %bb.1: # %if.then			; MACROFUSION-NEXT: # %bb.1: # %if.then
	; MACROFUSION-NEXT: movl $1, %eax			; MACROFUSION-NEXT: movl $1, %eax
	Show All 20 Lines
	; NOFUSION-NEXT: decl %eax			; NOFUSION-NEXT: decl %eax
	; NOFUSION-NEXT: movb $1, (%rsi)			; NOFUSION-NEXT: movb $1, (%rsi)
	; NOFUSION-NEXT: je .LBB3_2			; NOFUSION-NEXT: je .LBB3_2
	; NOFUSION-NEXT: # %bb.1: # %if.then			; NOFUSION-NEXT: # %bb.1: # %if.then
	; NOFUSION-NEXT: movl $1, %eax			; NOFUSION-NEXT: movl $1, %eax
	; NOFUSION-NEXT: .LBB3_2: # %if.end			; NOFUSION-NEXT: .LBB3_2: # %if.end
	; NOFUSION-NEXT: retq			; NOFUSION-NEXT: retq
	;			;
				; BRANCHFUSIONONLY-LABEL: macrofuse_dec_je:
				; BRANCHFUSIONONLY: # %bb.0: # %entry
				; BRANCHFUSIONONLY-NEXT: movl %edi, %eax
				; BRANCHFUSIONONLY-NEXT: decl %eax
				; BRANCHFUSIONONLY-NEXT: movb $1, (%rsi)
				; BRANCHFUSIONONLY-NEXT: je .LBB3_2
				; BRANCHFUSIONONLY-NEXT: # %bb.1: # %if.then
				; BRANCHFUSIONONLY-NEXT: movl $1, %eax
				; BRANCHFUSIONONLY-NEXT: .LBB3_2: # %if.end
				; BRANCHFUSIONONLY-NEXT: retq
				;
	; MACROFUSION-LABEL: macrofuse_dec_je:			; MACROFUSION-LABEL: macrofuse_dec_je:
	; MACROFUSION: # %bb.0: # %entry			; MACROFUSION: # %bb.0: # %entry
	; MACROFUSION-NEXT: movl %edi, %eax			; MACROFUSION-NEXT: movl %edi, %eax
	; MACROFUSION-NEXT: movb $1, (%rsi)			; MACROFUSION-NEXT: movb $1, (%rsi)
	; MACROFUSION-NEXT: decl %eax			; MACROFUSION-NEXT: decl %eax
	; MACROFUSION-NEXT: je .LBB3_2			; MACROFUSION-NEXT: je .LBB3_2
	; MACROFUSION-NEXT: # %bb.1: # %if.then			; MACROFUSION-NEXT: # %bb.1: # %if.then
	; MACROFUSION-NEXT: movl $1, %eax			; MACROFUSION-NEXT: movl $1, %eax
	Show All 15 Lines

llvm/trunk/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: opt < %s -loop-reduce -mcpu=btver2 -S \| FileCheck %s --check-prefix=JAG			; RUN: opt < %s -loop-reduce -mcpu=btver2 -S \| FileCheck %s --check-prefix=JAG
	; RUN: opt < %s -loop-reduce -mcpu=bdver2 -S \| FileCheck %s --check-prefix=BUL			; RUN: opt < %s -loop-reduce -mcpu=bdver2 -S \| FileCheck %s --check-prefix=BUL
	; RUN: opt < %s -loop-reduce -mcpu=haswell -S \| FileCheck %s --check-prefix=HSW			; RUN: opt < %s -loop-reduce -mcpu=haswell -S \| FileCheck %s --check-prefix=HSW

	; RUN: llc < %s \| FileCheck %s --check-prefix=BASE			; RUN: llc < %s \| FileCheck %s --check-prefix=BASE
	; RUN: llc < %s -mattr=macrofusion \| FileCheck %s --check-prefix=FUSE			; RUN: llc < %s -mattr=macrofusion \| FileCheck %s --check-prefix=FUSE
				; RUN: llc < %s -mattr=branchfusion \| FileCheck %s --check-prefix=FUSE

	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-unknown"			target triple = "x86_64-unknown-unknown"

	; PR35681 - https://bugs.llvm.org/show_bug.cgi?id=35681			; PR35681 - https://bugs.llvm.org/show_bug.cgi?id=35681
	; FIXME: If a CPU can macro-fuse a compare and branch, then we discount that			; FIXME: If a CPU can macro-fuse a compare and branch, then we discount that
	; cost in LSR and avoid generating large offsets in each memory access.			; cost in LSR and avoid generating large offsets in each memory access.
	; This reduces code size and may improve decode throughput.			; This reduces code size and may improve decode throughput.
	▲ Show 20 Lines • Show All 125 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86MacroFusion] Handle branch fusion (AMD CPUs).
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 192626

llvm/trunk/lib/Target/X86/X86.td

llvm/trunk/lib/Target/X86/X86MacroFusion.cpp

llvm/trunk/lib/Target/X86/X86Subtarget.h

llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h

llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp

llvm/trunk/test/CodeGen/X86/testb-je-fusion.ll

llvm/trunk/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86MacroFusion] Handle branch fusion (AMD CPUs).ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 192626

llvm/trunk/lib/Target/X86/X86.td

llvm/trunk/lib/Target/X86/X86MacroFusion.cpp

llvm/trunk/lib/Target/X86/X86Subtarget.h

llvm/trunk/lib/Target/X86/X86TargetTransformInfo.h

llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp

llvm/trunk/test/CodeGen/X86/testb-je-fusion.ll

llvm/trunk/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll

[X86MacroFusion] Handle branch fusion (AMD CPUs).
ClosedPublic