Diff 192425

llvm/lib/Target/X86/X86.td

Show First 20 Lines • Show All 338 Lines • ▼ Show 20 Lines
// Development Manual. This feature essentially means that REP MOVSB will copy		// Development Manual. This feature essentially means that REP MOVSB will copy
// using the largest available size instead of copying bytes one by one, making		// using the largest available size instead of copying bytes one by one, making
// it at least as fast as REPMOVS{W,D,Q}.		// it at least as fast as REPMOVS{W,D,Q}.
def FeatureERMSB		def FeatureERMSB
: SubtargetFeature<		: SubtargetFeature<
"ermsb", "HasERMSB", "true",		"ermsb", "HasERMSB", "true",
"REP MOVS/STOS are fast">;		"REP MOVS/STOS are fast">;

// Sandy Bridge and newer processors have many instructions that can be		// Sandy Bridge and newer processors have many instructions that can be
// fused with conditional branches and pass through the CPU as a single		// fused with conditional branches and pass through the CPU as a single
// operation.		// operation.
def FeatureMacroFusion		def FeatureMacroFusion
: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",		: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
"Various instructions can be fused with conditional branches">;		"Various instructions can be fused with conditional branches">;

		// Bulldozer and newer processors can merge CMP/TEST with conditional branches.
		lebedev.riUnsubmitted Done Reply Inline Actions // Bulldozer and newer processors can merge CMP/TEST (but not other instructions) // with conditional branches. lebedev.ri: ``` // Bulldozer and newer processors can merge CMP/TEST (but not other instructions) // with…
		def FeatureBranchFusion
		: SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
		"CMP/TEST can be fused with conditional branches">;
		lebedev.riUnsubmitted Not Done Reply Inline Actions I think it would be better to swap these around? `FeatureMacroFusion` is a superset of `FeatureBranchFusion`. (maybe even inherit `FeatureBranchFusion` by `FeatureMacroFusion`, not sure it is possible here.) lebedev.ri: I think it would be better to swap these around? `FeatureMacroFusion` is a superset of…
		courbetAuthorUnsubmitted Done Reply Inline Actions I'd rather not inherit because they are actually different (e.g. w.r.t. CMP). Also they have different subtle limitations that are not currently modeled (see Agner), but we might want to model in the future. courbet: I'd rather not inherit because they are actually different (e.g. w.r.t. CMP). Also they have…
		lebedev.riUnsubmitted Done Reply Inline Actions Sounds good. lebedev.ri: Sounds good.

// Gather is available since Haswell (AVX2 set). So technically, we can		// Gather is available since Haswell (AVX2 set). So technically, we can
// generate Gathers on all AVX2 processors. But the overhead on HSW is high.		// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
// Skylake Client processor has faster Gathers than HSW and performance is		// Skylake Client processor has faster Gathers than HSW and performance is
// similar to Skylake Server (AVX-512).		// similar to Skylake Server (AVX-512).
def FeatureHasFastGather		def FeatureHasFastGather
: SubtargetFeature<"fast-gather", "HasFastGather", "true",		: SubtargetFeature<"fast-gather", "HasFastGather", "true",
"Indicates if gather is reasonably fast">;		"Indicates if gather is reasonably fast">;

▲ Show 20 Lines • Show All 443 Lines • ▼ Show 20 Lines	list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87,
FeatureNOPL,		FeatureNOPL,
FeatureLZCNT,		FeatureLZCNT,
FeaturePOPCNT,		FeaturePOPCNT,
FeatureXSAVE,		FeatureXSAVE,
FeatureLWP,		FeatureLWP,
FeatureSlowSHLD,		FeatureSlowSHLD,
FeatureLAHFSAHF,		FeatureLAHFSAHF,
FeatureFast11ByteNOP,		FeatureFast11ByteNOP,
FeatureMacroFusion];		FeatureBranchFusion];
list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;		list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;

// PileDriver		// PileDriver
list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,		list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
FeatureBMI,		FeatureBMI,
FeatureTBM,		FeatureTBM,
FeatureFMA,		FeatureFMA,
FeatureFastBEXTR];		FeatureFastBEXTR];
Show All 33 Lines	list<SubtargetFeature> ZNFeatures = [FeatureADX,
FeatureFSGSBase,		FeatureFSGSBase,
FeatureFXSR,		FeatureFXSR,
FeatureNOPL,		FeatureNOPL,
FeatureFastLZCNT,		FeatureFastLZCNT,
FeatureLAHFSAHF,		FeatureLAHFSAHF,
FeatureLZCNT,		FeatureLZCNT,
FeatureFastBEXTR,		FeatureFastBEXTR,
FeatureFast15ByteNOP,		FeatureFast15ByteNOP,
FeatureMacroFusion,		FeatureBranchFusion,
FeatureMMX,		FeatureMMX,
FeatureMOVBE,		FeatureMOVBE,
FeatureMWAITX,		FeatureMWAITX,
FeaturePCLMUL,		FeaturePCLMUL,
FeaturePOPCNT,		FeaturePOPCNT,
FeaturePRFCHW,		FeaturePRFCHW,
FeatureRDRAND,		FeatureRDRAND,
FeatureRDSEED,		FeatureRDSEED,
▲ Show 20 Lines • Show All 340 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86MacroFusion.cpp

Show All 12 Lines

#include "X86MacroFusion.h"		#include "X86MacroFusion.h"
#include "X86Subtarget.h"		#include "X86Subtarget.h"
#include "llvm/CodeGen/MacroFusion.h"		#include "llvm/CodeGen/MacroFusion.h"
#include "llvm/CodeGen/TargetInstrInfo.h"		#include "llvm/CodeGen/TargetInstrInfo.h"

using namespace llvm;		using namespace llvm;

/// Check if the instr pair, FirstMI and SecondMI, should be fused		namespace {
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const TargetSubtargetInfo &TSI,
const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);
// Check if this processor supports macro-fusion.
if (!ST.hasMacroFusion())
return false;

enum {		enum class FirstKind { Test, Cmp, And, ALU, IncDec, Invalid };
		lebedev.riUnsubmitted Done Reply Inline Actions `FirstInstrKind`? lebedev.ri: `FirstInstrKind`?
FuseTest,
FuseCmp,
FuseInc
} FuseKind;

unsigned FirstOpcode = FirstMI
? FirstMI->getOpcode()
: static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
unsigned SecondOpcode = SecondMI.getOpcode();

switch (SecondOpcode) {		enum class SecondKind {
default:		ELG,
return false;		AB,
case X86::JE_1:		SPO,
case X86::JNE_1:		Invalid,
		RKSimonUnsubmitted Done Reply Inline Actions (style) Please comment enums - they are not obvious. RKSimon: (style) Please comment enums - they are not obvious.
case X86::JL_1:		};
		lebedev.riUnsubmitted Done Reply Inline Actions `JumpKind`? lebedev.ri: `JumpKind`?
case X86::JLE_1:
case X86::JG_1:
case X86::JGE_1:
FuseKind = FuseInc;
break;
case X86::JB_1:
case X86::JBE_1:
case X86::JA_1:
case X86::JAE_1:
FuseKind = FuseCmp;
break;
case X86::JS_1:
case X86::JNS_1:
case X86::JP_1:
case X86::JNP_1:
case X86::JO_1:
case X86::JNO_1:
FuseKind = FuseTest;
break;
}

switch (FirstOpcode) {		} // namespace

		static FirstKind classifyFirst(const MachineInstr &MI) {
		switch (MI.getOpcode()) {
default:		default:
return false;		return FirstKind::Invalid;
case X86::TEST8rr:		case X86::TEST8rr:
case X86::TEST16rr:		case X86::TEST16rr:
case X86::TEST32rr:		case X86::TEST32rr:
case X86::TEST64rr:		case X86::TEST64rr:
case X86::TEST8ri:		case X86::TEST8ri:
case X86::TEST16ri:		case X86::TEST16ri:
case X86::TEST32ri:		case X86::TEST32ri:
case X86::TEST64ri32:		case X86::TEST64ri32:
case X86::TEST8mr:		case X86::TEST8mr:
case X86::TEST16mr:		case X86::TEST16mr:
case X86::TEST32mr:		case X86::TEST32mr:
case X86::TEST64mr:		case X86::TEST64mr:
		return FirstKind::Test;
case X86::AND16ri:		case X86::AND16ri:
case X86::AND16ri8:		case X86::AND16ri8:
case X86::AND16rm:		case X86::AND16rm:
case X86::AND16rr:		case X86::AND16rr:
case X86::AND32ri:		case X86::AND32ri:
case X86::AND32ri8:		case X86::AND32ri8:
case X86::AND32rm:		case X86::AND32rm:
case X86::AND32rr:		case X86::AND32rr:
case X86::AND64ri32:		case X86::AND64ri32:
case X86::AND64ri8:		case X86::AND64ri8:
case X86::AND64rm:		case X86::AND64rm:
case X86::AND64rr:		case X86::AND64rr:
case X86::AND8ri:		case X86::AND8ri:
case X86::AND8rm:		case X86::AND8rm:
case X86::AND8rr:		case X86::AND8rr:
return true;		return FirstKind::And;
case X86::CMP16ri:		case X86::CMP16ri:
case X86::CMP16ri8:		case X86::CMP16ri8:
case X86::CMP16rm:		case X86::CMP16rm:
case X86::CMP16rr:		case X86::CMP16rr:
case X86::CMP16mr:		case X86::CMP16mr:
case X86::CMP32ri:		case X86::CMP32ri:
case X86::CMP32ri8:		case X86::CMP32ri8:
case X86::CMP32rm:		case X86::CMP32rm:
case X86::CMP32rr:		case X86::CMP32rr:
case X86::CMP32mr:		case X86::CMP32mr:
case X86::CMP64ri32:		case X86::CMP64ri32:
case X86::CMP64ri8:		case X86::CMP64ri8:
case X86::CMP64rm:		case X86::CMP64rm:
case X86::CMP64rr:		case X86::CMP64rr:
case X86::CMP64mr:		case X86::CMP64mr:
case X86::CMP8ri:		case X86::CMP8ri:
case X86::CMP8rm:		case X86::CMP8rm:
case X86::CMP8rr:		case X86::CMP8rr:
case X86::CMP8mr:		case X86::CMP8mr:
		return FirstKind::Cmp;
case X86::ADD16ri:		case X86::ADD16ri:
case X86::ADD16ri8:		case X86::ADD16ri8:
case X86::ADD16ri8_DB:		case X86::ADD16ri8_DB:
case X86::ADD16ri_DB:		case X86::ADD16ri_DB:
case X86::ADD16rm:		case X86::ADD16rm:
case X86::ADD16rr:		case X86::ADD16rr:
case X86::ADD16rr_DB:		case X86::ADD16rr_DB:
case X86::ADD32ri:		case X86::ADD32ri:
Show All 25 Lines	static FirstKind classifyFirst(const MachineInstr &MI) {
case X86::SUB32rr:		case X86::SUB32rr:
case X86::SUB64ri32:		case X86::SUB64ri32:
case X86::SUB64ri8:		case X86::SUB64ri8:
case X86::SUB64rm:		case X86::SUB64rm:
case X86::SUB64rr:		case X86::SUB64rr:
case X86::SUB8ri:		case X86::SUB8ri:
case X86::SUB8rm:		case X86::SUB8rm:
case X86::SUB8rr:		case X86::SUB8rr:
return FuseKind == FuseCmp \|\| FuseKind == FuseInc;		return FirstKind::ALU;
case X86::INC16r:		case X86::INC16r:
case X86::INC32r:		case X86::INC32r:
case X86::INC64r:		case X86::INC64r:
case X86::INC8r:		case X86::INC8r:
case X86::DEC16r:		case X86::DEC16r:
case X86::DEC32r:		case X86::DEC32r:
case X86::DEC64r:		case X86::DEC64r:
case X86::DEC8r:		case X86::DEC8r:
return FuseKind == FuseInc;		return FirstKind::IncDec;
case X86::INSTRUCTION_LIST_END:		}
		}

		static SecondKind classifySecond(const MachineInstr &MI) {
		switch (MI.getOpcode()) {
		default:
		return SecondKind::Invalid;
		case X86::JE_1:
		case X86::JNE_1:
		case X86::JL_1:
		case X86::JLE_1:
		case X86::JG_1:
		case X86::JGE_1:
		return SecondKind::ELG;
		case X86::JB_1:
		case X86::JBE_1:
		case X86::JA_1:
		case X86::JAE_1:
		return SecondKind::AB;
		case X86::JS_1:
		case X86::JNS_1:
		case X86::JP_1:
		case X86::JNP_1:
		case X86::JO_1:
		case X86::JNO_1:
		return SecondKind::SPO;
		}
		}

		/// Check if the instr pair, FirstMI and SecondMI, should be fused
		/// together. Given SecondMI, when FirstMI is unspecified, then check if
		/// SecondMI may be part of a fused pair at all.
		static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
		const TargetSubtargetInfo &TSI,
		const MachineInstr *FirstMI,
		const MachineInstr &SecondMI) {
		const X86Subtarget &ST = static_cast<const X86Subtarget &>(TSI);

		// Check if this processor supports any kind of fusion.
		if (!(ST.hasBranchFusion() \|\| ST.hasMacroFusion()))
		return false;

		const SecondKind BranchKind = classifySecond(SecondMI);

		if (BranchKind == SecondKind::Invalid)
		return false; // Second cannot be fused with anything.

		if (FirstMI == nullptr)
		return true; // We're only checking whether Second can be fused at all.

		const FirstKind TestKind = classifyFirst(*FirstMI);

		if (ST.hasBranchFusion()) {
		// Branch fusion can merge CMP and TEST with all conditional jumps.
		return (TestKind == FirstKind::Cmp \|\| TestKind == FirstKind::Test);
		}

		if (ST.hasMacroFusion()) {
		// Macro Fusion rules are a bit more complex. See Agner Fog's
		// Microarchitecture table 9.2 "Instruction Fusion".
		switch (TestKind) {
		case FirstKind::Test:
		case FirstKind::And:
return true;		return true;
		case FirstKind::Cmp:
		case FirstKind::ALU:
		return BranchKind == SecondKind::ELG \|\| BranchKind == SecondKind::AB;
		case FirstKind::IncDec:
		return BranchKind == SecondKind::ELG;
		case FirstKind::Invalid:
		return false;
}		}
}		}

		llvm_unreachable("");
		RKSimonUnsubmitted Done Reply Inline Actions Add a unreachable message RKSimon: Add a unreachable message
		}

namespace llvm {		namespace llvm {

std::unique_ptr<ScheduleDAGMutation>		std::unique_ptr<ScheduleDAGMutation>
createX86MacroFusionDAGMutation () {		createX86MacroFusionDAGMutation () {
return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);		return createBranchMacroFusionDAGMutation(shouldScheduleAdjacent);
}		}

} // end namespace llvm		} // end namespace llvm

llvm/lib/Target/X86/X86Subtarget.h

Show First 20 Lines • Show All 291 Lines • ▼ Show 20 Lines	protected:
bool HasFastLZCNT = false;		bool HasFastLZCNT = false;

/// True if SHLD based rotate is fast.		/// True if SHLD based rotate is fast.
bool HasFastSHLDRotate = false;		bool HasFastSHLDRotate = false;

/// True if the processor supports macrofusion.		/// True if the processor supports macrofusion.
bool HasMacroFusion = false;		bool HasMacroFusion = false;

		/// True if the processor supports branch fusion.
		bool HasBranchFusion = false;

/// True if the processor has enhanced REP MOVSB/STOSB.		/// True if the processor has enhanced REP MOVSB/STOSB.
bool HasERMSB = false;		bool HasERMSB = false;

/// True if the short functions should be padded to prevent		/// True if the short functions should be padded to prevent
/// a stall when returning too early.		/// a stall when returning too early.
bool PadShortFunctions = false;		bool PadShortFunctions = false;

/// True if two memory operand instructions should use a temporary register		/// True if two memory operand instructions should use a temporary register
▲ Show 20 Lines • Show All 329 Lines • ▼ Show 20 Lines	public:
bool hasFastGather() const { return HasFastGather; }		bool hasFastGather() const { return HasFastGather; }
bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }		bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }		bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }		bool hasFastLZCNT() const { return HasFastLZCNT; }
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }		bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
bool hasFastBEXTR() const { return HasFastBEXTR; }		bool hasFastBEXTR() const { return HasFastBEXTR; }
bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }		bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
bool hasMacroFusion() const { return HasMacroFusion; }		bool hasMacroFusion() const { return HasMacroFusion; }
		bool hasBranchFusion() const { return HasBranchFusion; }
bool hasERMSB() const { return HasERMSB; }		bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }		bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }		bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }		bool padShortFunctions() const { return PadShortFunctions; }
bool slowTwoMemOps() const { return SlowTwoMemOps; }		bool slowTwoMemOps() const { return SlowTwoMemOps; }
bool LEAusesAG() const { return LEAUsesAG; }		bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }		bool slowLEA() const { return SlowLEA; }
bool slow3OpsLEA() const { return Slow3OpsLEA; }		bool slow3OpsLEA() const { return Slow3OpsLEA; }
▲ Show 20 Lines • Show All 199 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86TargetTransformInfo.h

Show All 30 Lines	class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {

const X86Subtarget *ST;		const X86Subtarget *ST;
const X86TargetLowering *TLI;		const X86TargetLowering *TLI;

const X86Subtarget *getST() const { return ST; }		const X86Subtarget *getST() const { return ST; }
const X86TargetLowering *getTLI() const { return TLI; }		const X86TargetLowering *getTLI() const { return TLI; }

const FeatureBitset InlineFeatureIgnoreList = {		const FeatureBitset InlineFeatureIgnoreList = {
// This indicates the CPU is 64 bit capable not that we are in 64-bit mode.		// This indicates the CPU is 64 bit capable not that we are in 64-bit
		// mode.
		RKSimonUnsubmitted Done Reply Inline Actions NFC change - commit separetely RKSimon: NFC change - commit separetely
X86::Feature64Bit,		X86::Feature64Bit,

// These features don't have any intrinsics or ABI effect.		// These features don't have any intrinsics or ABI effect.
X86::FeatureNOPL,		X86::FeatureNOPL,
X86::FeatureCMPXCHG16B,		X86::FeatureCMPXCHG16B,
X86::FeatureLAHFSAHF,		X86::FeatureLAHFSAHF,

// Codegen control options.		// Codegen control options.
X86::FeatureFast11ByteNOP,		X86::FeatureFast11ByteNOP,
X86::FeatureFast15ByteNOP,		X86::FeatureFast15ByteNOP,
X86::FeatureFastBEXTR,		X86::FeatureFastBEXTR,
X86::FeatureFastHorizontalOps,		X86::FeatureFastHorizontalOps,
X86::FeatureFastLZCNT,		X86::FeatureFastLZCNT,
X86::FeatureFastPartialYMMorZMMWrite,		X86::FeatureFastPartialYMMorZMMWrite,
X86::FeatureFastScalarFSQRT,		X86::FeatureFastScalarFSQRT,
X86::FeatureFastSHLDRotate,		X86::FeatureFastSHLDRotate,
X86::FeatureFastVariableShuffle,		X86::FeatureFastVariableShuffle,
X86::FeatureFastVectorFSQRT,		X86::FeatureFastVectorFSQRT,
X86::FeatureLEAForSP,		X86::FeatureLEAForSP,
X86::FeatureLEAUsesAG,		X86::FeatureLEAUsesAG,
X86::FeatureLZCNTFalseDeps,		X86::FeatureLZCNTFalseDeps,
X86::FeatureMacroFusion,		X86::FeatureMacroFusion,
		X86::FeatureBranchFusion,
X86::FeatureMergeToThreeWayBranch,		X86::FeatureMergeToThreeWayBranch,
X86::FeaturePadShortFunctions,		X86::FeaturePadShortFunctions,
X86::FeaturePOPCNTFalseDeps,		X86::FeaturePOPCNTFalseDeps,
X86::FeatureSSEUnalignedMem,		X86::FeatureSSEUnalignedMem,
X86::FeatureSlow3OpsLEA,		X86::FeatureSlow3OpsLEA,
X86::FeatureSlowDivide32,		X86::FeatureSlowDivide32,
X86::FeatureSlowDivide64,		X86::FeatureSlowDivide64,
X86::FeatureSlowIncDec,		X86::FeatureSlowIncDec,
X86::FeatureSlowLEA,		X86::FeatureSlowLEA,
X86::FeatureSlowPMADDWD,		X86::FeatureSlowPMADDWD,
X86::FeatureSlowPMULLD,		X86::FeatureSlowPMULLD,
X86::FeatureSlowSHLD,		X86::FeatureSlowSHLD,
X86::FeatureSlowTwoMemOps,		X86::FeatureSlowTwoMemOps,
X86::FeatureSlowUAMem16,		X86::FeatureSlowUAMem16,

// Perf-tuning flags.		// Perf-tuning flags.
X86::FeatureHasFastGather,		X86::FeatureHasFastGather,
X86::FeatureSlowUAMem32,		X86::FeatureSlowUAMem32,

// Based on whether user set the -mprefer-vector-width command line.		// Based on whether user set the -mprefer-vector-width command line.
X86::FeaturePrefer256Bit,		X86::FeaturePrefer256Bit,

// CPU name enums. These just follow CPU string.		// CPU name enums. These just follow CPU string.
X86::ProcIntelAtom,		X86::ProcIntelAtom,
X86::ProcIntelGLM,		X86::ProcIntelGLM,
X86::ProcIntelGLP,		X86::ProcIntelGLP,
X86::ProcIntelSLM,		X86::ProcIntelSLM,
X86::ProcIntelTRM,		X86::ProcIntelTRM,
};		};

public:		public:
explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)		explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),		: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}		TLI(ST->getTargetLowering()) {}

/// \name Scalar TTI Implementations		/// \name Scalar TTI Implementations
▲ Show 20 Lines • Show All 113 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Show First 20 Lines • Show All 2,978 Lines • ▼ Show 20 Lines	return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
C1.NumIVMuls, C1.NumBaseAdds,		C1.NumIVMuls, C1.NumBaseAdds,
C1.ScaleCost, C1.ImmCost, C1.SetupCost) <		C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,		std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
C2.NumIVMuls, C2.NumBaseAdds,		C2.NumIVMuls, C2.NumBaseAdds,
C2.ScaleCost, C2.ImmCost, C2.SetupCost);		C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}		}

bool X86TTIImpl::canMacroFuseCmp() {		bool X86TTIImpl::canMacroFuseCmp() {
return ST->hasMacroFusion();		return ST->hasMacroFusion() \|\| ST->hasBranchFusion();
}		}

bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {		bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
if (!ST->hasAVX())		if (!ST->hasAVX())
return false;		return false;

// The backend can't handle a single element vector.		// The backend can't handle a single element vector.
if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1)		if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1)
▲ Show 20 Lines • Show All 457 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: opt < %s -loop-reduce -mcpu=btver2 -S \| FileCheck %s --check-prefix=JAG			; RUN: opt < %s -loop-reduce -mcpu=btver2 -S \| FileCheck %s --check-prefix=JAG
				lebedev.riUnsubmitted Done Reply Inline Actions Also, if you do add a test that we don't fuse non-CMP/TEST instructions if we only have `branchfusion`, could you please also add a bdver2 runline, and precommit all that? lebedev.ri: Also, if you do add a test that we don't fuse non-CMP/TEST instructions if we only have…
	; RUN: opt < %s -loop-reduce -mcpu=haswell -S \| FileCheck %s --check-prefix=HSW			; RUN: opt < %s -loop-reduce -mcpu=haswell -S \| FileCheck %s --check-prefix=HSW

	; RUN: llc < %s \| FileCheck %s --check-prefix=BASE			; RUN: llc < %s \| FileCheck %s --check-prefix=BASE
	; RUN: llc < %s -mattr=macrofusion \| FileCheck %s --check-prefix=FUSE			; RUN: llc < %s -mattr=macrofusion \| FileCheck %s --check-prefix=FUSE
				; RUN: llc < %s -mattr=branchfusion \| FileCheck %s --check-prefix=FUSE

	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"			target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-unknown"			target triple = "x86_64-unknown-unknown"

	; PR35681 - https://bugs.llvm.org/show_bug.cgi?id=35681			; PR35681 - https://bugs.llvm.org/show_bug.cgi?id=35681
	; FIXME: If a CPU can macro-fuse a compare and branch, then we discount that			; FIXME: If a CPU can macro-fuse a compare and branch, then we discount that
	; cost in LSR and avoid generating large offsets in each memory access.			; cost in LSR and avoid generating large offsets in each memory access.
	; This reduces code size and may improve decode throughput.			; This reduces code size and may improve decode throughput.
	▲ Show 20 Lines • Show All 104 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86MacroFusion] Handle branch fusion (AMD CPUs).
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 192425

llvm/lib/Target/X86/X86.td

llvm/lib/Target/X86/X86MacroFusion.cpp

llvm/lib/Target/X86/X86Subtarget.h

llvm/lib/Target/X86/X86TargetTransformInfo.h

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86MacroFusion] Handle branch fusion (AMD CPUs).ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 192425

llvm/lib/Target/X86/X86.td

llvm/lib/Target/X86/X86MacroFusion.cpp

llvm/lib/Target/X86/X86Subtarget.h

llvm/lib/Target/X86/X86TargetTransformInfo.h

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

llvm/test/Transforms/LoopStrengthReduce/X86/macro-fuse-cmp.ll

[X86MacroFusion] Handle branch fusion (AMD CPUs).
ClosedPublic