Diff 43191

llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp

Show First 20 Lines • Show All 151 Lines • ▼ Show 20 Lines	class X86DAGToDAGISel final : public SelectionDAGISel {
/// Keep a pointer to the X86Subtarget around so that we can		/// Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.		/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;		const X86Subtarget *Subtarget;

/// If true, selector should try to optimize for code size instead of		/// If true, selector should try to optimize for code size instead of
/// performance.		/// performance.
bool OptForSize;		bool OptForSize;

		/// If true, selector should try to optimize for minimum code size.
		bool OptForMinSize;

public:		public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)		explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), OptForSize(false) {}		: SelectionDAGISel(tm, OptLevel), OptForSize(false),
		OptForMinSize(false) {}

const char *getPassName() const override {		const char *getPassName() const override {
return "X86 DAG->DAG Instruction Selection";		return "X86 DAG->DAG Instruction Selection";
}		}

bool runOnMachineFunction(MachineFunction &MF) override {		bool runOnMachineFunction(MachineFunction &MF) override {
// Reset the subtarget each time through.		// Reset the subtarget each time through.
Subtarget = &MF.getSubtarget<X86Subtarget>();		Subtarget = &MF.getSubtarget<X86Subtarget>();
▲ Show 20 Lines • Show All 355 Lines • ▼ Show 20 Lines	static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&		if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&		Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
Callee.getValue(1).hasOneUse())		Callee.getValue(1).hasOneUse())
return true;		return true;
return false;		return false;
}		}

void X86DAGToDAGISel::PreprocessISelDAG() {		void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.		// OptFor[Min]Size are used in pattern predicates that isel is matching.
OptForSize = MF->getFunction()->optForSize();		OptForSize = MF->getFunction()->optForSize();
		OptForMinSize = MF->getFunction()->optForMinSize();
		assert((!OptForMinSize \|\| OptForSize) && "OptForMinSize implies OptForSize");

for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),		for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {		E = CurDAG->allnodes_end(); I != E; ) {
SDNode N = &I++; // Preincrement iterator to avoid invalidation issues.		SDNode N = &I++; // Preincrement iterator to avoid invalidation issues.

if (OptLevel != CodeGenOpt::None &&		if (OptLevel != CodeGenOpt::None &&
// Only does this when target favors doesn't favor register indirect		// Only does this when target favors doesn't favor register indirect
// call.		// call.
▲ Show 20 Lines • Show All 2,469 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86InstrCompiler.td

	Show First 20 Lines • Show All 244 Lines • ▼ Show 20 Lines

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// Alias Instructions			// Alias Instructions
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	// Alias instruction mapping movr0 to xor.			// Alias instruction mapping movr0 to xor.
	// FIXME: remove when we can teach regalloc that xor reg, reg is ok.			// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
	let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,			let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
	isPseudo = 1 in			isPseudo = 1, AddedComplexity = 20 in
	def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",			def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
	[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;			[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;

	// Other widths can also make use of the 32-bit xor, which may have a smaller			// Other widths can also make use of the 32-bit xor, which may have a smaller
	// encoding and avoid partial register updates.			// encoding and avoid partial register updates.
	def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;			def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
	def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;			def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
	def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {			def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
	let AddedComplexity = 20;			let AddedComplexity = 20;
	}			}

	let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],			let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
	AddedComplexity = 1 in {			AddedComplexity = 15 in {
	// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,			// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
	// which only require 3 bytes compared to MOV32ri which requires 5.			// which only require 3 bytes compared to MOV32ri which requires 5.
	let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {			let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
	def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",			def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
	[(set GR32:$dst, 1)]>;			[(set GR32:$dst, 1)]>;
	def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",			def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
	[(set GR32:$dst, -1)]>;			[(set GR32:$dst, -1)]>;
	}			}

	// MOV16ri is 4 bytes, so the instructions above are smaller.			// MOV16ri is 4 bytes, so the instructions above are smaller.
	def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;			def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;
	def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;			def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
	}			}

				let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
				// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
				// FIXME: Add itinerary class and Schedule.
				def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
				[(set GR32:$dst, i32immSExt8:$src)]>,
				Requires<[OptForMinSize]>;
				def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
				[(set GR64:$dst, i64immSExt8:$src)]>,
				Requires<[OptForMinSize, NotWin64WithoutFP]>;
				}

	// Materialize i64 constant where top 32-bits are zero. This could theoretically			// Materialize i64 constant where top 32-bits are zero. This could theoretically
	// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however			// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
	// that would make it more difficult to rematerialize.			// that would make it more difficult to rematerialize.
	let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,			let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
	isCodeGenOnly = 1, hasSideEffects = 0 in			isCodeGenOnly = 1, hasSideEffects = 0 in
	def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),			def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
	"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;			"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;

	▲ Show 20 Lines • Show All 1,577 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86InstrInfo.h

Show All 17 Lines
#include "X86RegisterInfo.h"		#include "X86RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"		#include "llvm/ADT/DenseMap.h"
#include "llvm/Target/TargetInstrInfo.h"		#include "llvm/Target/TargetInstrInfo.h"

#define GET_INSTRINFO_HEADER		#define GET_INSTRINFO_HEADER
#include "X86GenInstrInfo.inc"		#include "X86GenInstrInfo.inc"

namespace llvm {		namespace llvm {
		class MachineInstrBuilder;
class X86RegisterInfo;		class X86RegisterInfo;
class X86Subtarget;		class X86Subtarget;

namespace X86 {		namespace X86 {
// X86 specific condition code. These correspond to X86_*_COND in		// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.		// X86InstrInfo.td. They must be kept in synch.
enum CondCode {		enum CondCode {
COND_A = 0,		COND_A = 0,
▲ Show 20 Lines • Show All 525 Lines • ▼ Show 20 Lines	MachineInstr foldMemoryOperandCustom(MachineFunction &MF, MachineInstr MI,
ArrayRef<MachineOperand> MOs,		ArrayRef<MachineOperand> MOs,
MachineBasicBlock::iterator InsertPt,		MachineBasicBlock::iterator InsertPt,
unsigned Size, unsigned Align) const;		unsigned Size, unsigned Align) const;

/// isFrameOperand - Return true and the FrameIndex if the specified		/// isFrameOperand - Return true and the FrameIndex if the specified
/// operand and follow operands form a reference to the stack frame.		/// operand and follow operands form a reference to the stack frame.
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,		bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
int &FrameIndex) const;		int &FrameIndex) const;

		/// Expand the MOVImmSExti8 pseudo-instructions.
		bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
};		};

} // End llvm namespace		} // End llvm namespace

#endif		#endif

llvm/trunk/lib/Target/X86/X86InstrInfo.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show All 17 Lines
#include "X86Subtarget.h"		#include "X86Subtarget.h"
#include "X86TargetMachine.h"		#include "X86TargetMachine.h"
#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"		#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineConstantPool.h"		#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"		#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"		#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"		#include "llvm/CodeGen/MachineInstrBuilder.h"
		#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"		#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"		#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"		#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"		#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"		#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"		#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"		#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"		#include "llvm/MC/MCInst.h"
▲ Show 20 Lines • Show All 5,258 Lines • ▼ Show 20 Lines	static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,

// Turn the pseudo into an INC or DEC.		// Turn the pseudo into an INC or DEC.
MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));		MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
MIB.addReg(Reg);		MIB.addReg(Reg);

return true;		return true;
}		}

		bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
		MachineBasicBlock &MBB = *MIB->getParent();
		DebugLoc DL = MIB->getDebugLoc();
		int64_t Imm = MIB->getOperand(1).getImm();
		assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
		MachineBasicBlock::iterator I = MIB.getInstr();

		int StackAdjustment;

		if (Subtarget.is64Bit()) {
		assert(MIB->getOpcode() == X86::MOV64ImmSExti8 \|\|
		MIB->getOpcode() == X86::MOV32ImmSExti8);
		// 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
		// widen the register if necessary.
		StackAdjustment = 8;
		BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
		MIB->setDesc(get(X86::POP64r));
		MIB->getOperand(0)
		.setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), MVT::i64));
		} else {
		assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
		StackAdjustment = 4;
		BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
		MIB->setDesc(get(X86::POP32r));
		}

		// Build CFI if necessary.
		MachineFunction &MF = *MBB.getParent();
		const X86FrameLowering *TFL = Subtarget.getFrameLowering();
		bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
		bool NeedsDwarfCFI =
		!IsWin64Prologue &&
		(MF.getMMI().hasDebugInfo() \|\| MF.getFunction()->needsUnwindTableEntry());
		bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
		if (EmitCFI) {
		TFL->BuildCFI(MBB, I, DL,
		MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
		TFL->BuildCFI(MBB, std::next(I), DL,
		MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
		}

		return true;
		}

// LoadStackGuard has so far only been implemented for 64-bit MachO. Different		// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
// code sequence is needed for other targets.		// code sequence is needed for other targets.
static void expandLoadStackGuard(MachineInstrBuilder &MIB,		static void expandLoadStackGuard(MachineInstrBuilder &MIB,
const TargetInstrInfo &TII) {		const TargetInstrInfo &TII) {
MachineBasicBlock &MBB = *MIB->getParent();		MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();		DebugLoc DL = MIB->getDebugLoc();
unsigned Reg = MIB->getOperand(0).getReg();		unsigned Reg = MIB->getOperand(0).getReg();
const GlobalValue *GV =		const GlobalValue *GV =
Show All 16 Lines	bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);		MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
switch (MI->getOpcode()) {		switch (MI->getOpcode()) {
case X86::MOV32r0:		case X86::MOV32r0:
return Expand2AddrUndef(MIB, get(X86::XOR32rr));		return Expand2AddrUndef(MIB, get(X86::XOR32rr));
case X86::MOV32r1:		case X86::MOV32r1:
return expandMOV32r1(MIB, this, /MinusOne=*/ false);		return expandMOV32r1(MIB, this, /MinusOne=*/ false);
case X86::MOV32r_1:		case X86::MOV32r_1:
return expandMOV32r1(MIB, this, /MinusOne=*/ true);		return expandMOV32r1(MIB, this, /MinusOne=*/ true);
		case X86::MOV32ImmSExti8:
		case X86::MOV64ImmSExti8:
		return ExpandMOVImmSExti8(MIB);
case X86::SETB_C8r:		case X86::SETB_C8r:
return Expand2AddrUndef(MIB, get(X86::SBB8rr));		return Expand2AddrUndef(MIB, get(X86::SBB8rr));
case X86::SETB_C16r:		case X86::SETB_C16r:
return Expand2AddrUndef(MIB, get(X86::SBB16rr));		return Expand2AddrUndef(MIB, get(X86::SBB16rr));
case X86::SETB_C32r:		case X86::SETB_C32r:
return Expand2AddrUndef(MIB, get(X86::SBB32rr));		return Expand2AddrUndef(MIB, get(X86::SBB32rr));
case X86::SETB_C64r:		case X86::SETB_C64r:
return Expand2AddrUndef(MIB, get(X86::SBB64rr));		return Expand2AddrUndef(MIB, get(X86::SBB64rr));
▲ Show 20 Lines • Show All 1,964 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86InstrInfo.td

	Show First 20 Lines • Show All 814 Lines • ▼ Show 20 Lines
	def In16BitMode : Predicate<"Subtarget->is16Bit()">,			def In16BitMode : Predicate<"Subtarget->is16Bit()">,
	AssemblerPredicate<"Mode16Bit", "16-bit mode">;			AssemblerPredicate<"Mode16Bit", "16-bit mode">;
	def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,			def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,
	AssemblerPredicate<"!Mode16Bit", "Not 16-bit mode">;			AssemblerPredicate<"!Mode16Bit", "Not 16-bit mode">;
	def In32BitMode : Predicate<"Subtarget->is32Bit()">,			def In32BitMode : Predicate<"Subtarget->is32Bit()">,
	AssemblerPredicate<"Mode32Bit", "32-bit mode">;			AssemblerPredicate<"Mode32Bit", "32-bit mode">;
	def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;			def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
	def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;			def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
				def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() \|\|"
				"Subtarget->getFrameLowering()->hasFP(*MF)">;
	def IsPS4 : Predicate<"Subtarget->isTargetPS4()">;			def IsPS4 : Predicate<"Subtarget->isTargetPS4()">;
	def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">;			def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">;
	def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;			def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
	def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;			def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
	def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;			def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
	def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;			def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
	def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"			def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
	"TM.getCodeModel() != CodeModel::Kernel">;			"TM.getCodeModel() != CodeModel::Kernel">;
	def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small \|\|"			def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small \|\|"
	"TM.getCodeModel() == CodeModel::Kernel">;			"TM.getCodeModel() == CodeModel::Kernel">;
	def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;			def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
	def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;			def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
	def OptForSize : Predicate<"OptForSize">;			def OptForSize : Predicate<"OptForSize">;
				def OptForMinSize : Predicate<"OptForMinSize">;
	def OptForSpeed : Predicate<"!OptForSize">;			def OptForSpeed : Predicate<"!OptForSize">;
	def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;			def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
	def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;			def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
	def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;			def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
	def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;			def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
	def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;			def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	▲ Show 20 Lines • Show All 2,212 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/materialize-one.ll

	; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - \| FileCheck %s --check-prefix=CHECK32
	; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - \| FileCheck %s --check-prefix=CHECK64

	define i32 @one32() optsize {
	entry:
	ret i32 1

	; CHECK32-LABEL: one32
	; CHECK32: xorl %eax, %eax
	; CHECK32-NEXT: incl %eax
	; CHECK32-NEXT: ret

	; FIXME: Figure out the best approach in 64-bit mode.
	; CHECK64-LABEL: one32
	; CHECK64: movl $1, %eax
	; CHECK64-NEXT: retq
	}

	define i32 @minus_one32() optsize {
	entry:
	ret i32 -1

	; CHECK32-LABEL: minus_one32
	; CHECK32: xorl %eax, %eax
	; CHECK32-NEXT: decl %eax
	; CHECK32-NEXT: ret
	}

	define i16 @one16() optsize {
	entry:
	ret i16 1

	; CHECK32-LABEL: one16
	; CHECK32: xorl %eax, %eax
	; CHECK32-NEXT: incl %eax
	; CHECK32-NEXT: retl
	}

	define i16 @minus_one16() optsize {
	entry:
	ret i16 -1

	; CHECK32-LABEL: minus_one16
	; CHECK32: xorl %eax, %eax
	; CHECK32-NEXT: decl %eax
	; CHECK32-NEXT: retl
	}

	define i32 @test_rematerialization() optsize {
	entry:
	; Materialize -1 (thiscall forces it into %ecx).
	tail call x86_thiscallcc void @f(i32 -1)

	; Clobber all registers except %esp, leaving nowhere to store the -1 besides
	; spilling it to the stack.
	tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()

	; -1 should be re-materialized here instead of getting spilled above.
	ret i32 -1

	; CHECK32-LABEL: test_rematerialization
	; CHECK32: xorl %ecx, %ecx
	; CHECK32-NEXT: decl %ecx
	; CHECK32: calll
	; CHECK32: xorl %eax, %eax
	; CHECK32-NEXT: decl %eax
	; CHECK32-NOT: %eax
	; CHECK32: retl
	}

	define i32 @test_rematerialization2(i32 %x) optsize {
	entry:
	; Materialize -1 (thiscall forces it into %ecx).
	tail call x86_thiscallcc void @f(i32 -1)

	; Clobber all registers except %esp, leaving nowhere to store the -1 besides
	; spilling it to the stack.
	tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()

	; Define eflags.
	%a = icmp ne i32 %x, 123
	%b = zext i1 %a to i32
	; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
	; It must therefore not use the xor-dec lowering.
	%c = select i1 %a, i32 %b, i32 -1
	ret i32 %c

	; CHECK32-LABEL: test_rematerialization2
	; CHECK32: xorl %ecx, %ecx
	; CHECK32-NEXT: decl %ecx
	; CHECK32: calll
	; CHECK32: cmpl
	; CHECK32: setne
	; CHECK32-NOT: xorl
	; CHECK32: movl $-1
	; CHECK32: cmov
	; CHECK32: retl
	}

	declare x86_thiscallcc void @f(i32)

llvm/trunk/test/CodeGen/X86/materialize.ll

				; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - \| FileCheck %s --check-prefix=CHECK32
				; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - \| FileCheck %s --check-prefix=CHECK64
				; RUN: llc -mtriple=x86_64-pc-win32 -mattr=+cmov %s -o - \| FileCheck %s --check-prefix=CHECKWIN64

				define i32 @one32_nooptsize() {
				entry:
				ret i32 1

				; When not optimizing for size, use mov.
				; CHECK32-LABEL: one32_nooptsize:
				; CHECK32: movl $1, %eax
				; CHECK32-NEXT: retl
				; CHECK64-LABEL: one32_nooptsize:
				; CHECK64: movl $1, %eax
				; CHECK64-NEXT: retq
				}

				define i32 @one32() optsize {
				entry:
				ret i32 1

				; CHECK32-LABEL: one32:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: incl %eax
				; CHECK32-NEXT: retl

				; FIXME: Figure out the best approach in 64-bit mode.
				; CHECK64-LABEL: one32:
				; CHECK64: movl $1, %eax
				; CHECK64-NEXT: retq
				}

				define i32 @one32_minsize() minsize {
				entry:
				ret i32 1

				; On 32-bit, xor-inc is preferred over push-pop.
				; CHECK32-LABEL: one32_minsize:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: incl %eax
				; CHECK32-NEXT: retl

				; On 64-bit we don't do xor-inc yet, so push-pop it is. Note that we have to
				; pop into a 64-bit register even when we just need 32 bits.
				; CHECK64-LABEL: one32_minsize:
				; CHECK64: pushq $1
				; CHECK64: .cfi_adjust_cfa_offset 8
				; CHECK64: popq %rax
				; CHECK64: .cfi_adjust_cfa_offset -8
				; CHECK64-NEXT: retq
				}

				define i64 @one64_minsize() minsize {
				entry:
				ret i64 1
				; On 64-bit we don't do xor-inc yet, so push-pop it is.
				; CHECK64-LABEL: one64_minsize:
				; CHECK64: pushq $1
				; CHECK64: .cfi_adjust_cfa_offset 8
				; CHECK64: popq %rax
				; CHECK64: .cfi_adjust_cfa_offset -8
				; CHECK64-NEXT: retq

				; On Win64 we can't adjust the stack unless there's a frame pointer.
				; CHECKWIN64-LABEL: one64_minsize:
				; CHECKWIN64: movl $1, %eax
				; CHECKWIN64-NEXT: retq
				}

				define i32 @minus_one32() optsize {
				entry:
				ret i32 -1

				; CHECK32-LABEL: minus_one32:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: decl %eax
				; CHECK32-NEXT: retl
				}

				define i32 @minus_one32_minsize() minsize {
				entry:
				ret i32 -1

				; xor-dec is preferred over push-pop.
				; CHECK32-LABEL: minus_one32_minsize:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: decl %eax
				; CHECK32-NEXT: retl
				}

				define i16 @one16() optsize {
				entry:
				ret i16 1

				; CHECK32-LABEL: one16:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: incl %eax
				; CHECK32-NEXT: retl
				}

				define i16 @minus_one16() optsize {
				entry:
				ret i16 -1

				; CHECK32-LABEL: minus_one16:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: decl %eax
				; CHECK32-NEXT: retl
				}

				define i32 @minus_five32() minsize {
				entry:
				ret i32 -5

				; CHECK32-LABEL: minus_five32:
				; CHECK32: pushl $-5
				; CHECK32: popl %eax
				; CHECK32: retl
				}

				define i64 @minus_five64() minsize {
				entry:
				ret i64 -5

				; CHECK64-LABEL: minus_five64:
				; CHECK64: pushq $-5
				; CHECK64: .cfi_adjust_cfa_offset 8
				; CHECK64: popq %rax
				; CHECK64: .cfi_adjust_cfa_offset -8
				; CHECK64: retq
				}

				define i32 @rematerialize_minus_one() optsize {
				entry:
				; Materialize -1 (thiscall forces it into %ecx).
				tail call x86_thiscallcc void @f(i32 -1)

				; Clobber all registers except %esp, leaving nowhere to store the -1 besides
				; spilling it to the stack.
				tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()

				; -1 should be re-materialized here instead of getting spilled above.
				ret i32 -1

				; CHECK32-LABEL: rematerialize_minus_one
				; CHECK32: xorl %ecx, %ecx
				; CHECK32-NEXT: decl %ecx
				; CHECK32: calll
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: decl %eax
				; CHECK32-NOT: %eax
				; CHECK32: retl
				}

				define i32 @rematerialize_minus_one_eflags(i32 %x) optsize {
				entry:
				; Materialize -1 (thiscall forces it into %ecx).
				tail call x86_thiscallcc void @f(i32 -1)

				; Clobber all registers except %esp, leaving nowhere to store the -1 besides
				; spilling it to the stack.
				tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()

				; Define eflags.
				%a = icmp ne i32 %x, 123
				%b = zext i1 %a to i32
				; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
				; It must therefore not use the xor-dec lowering.
				%c = select i1 %a, i32 %b, i32 -1
				ret i32 %c

				; CHECK32-LABEL: rematerialize_minus_one_eflags
				; CHECK32: xorl %ecx, %ecx
				; CHECK32-NEXT: decl %ecx
				; CHECK32: calll
				; CHECK32: cmpl
				; CHECK32: setne
				; CHECK32-NOT: xorl
				; CHECK32: movl $-1
				; CHECK32: cmov
				; CHECK32: retl
				}

				declare x86_thiscallcc void @f(i32)

llvm/trunk/test/CodeGen/X86/powi.ll

	Show All 23 Lines
	; CHECK-NEXT: jmp			; CHECK-NEXT: jmp
	%ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]			%ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
	ret double %ret			ret double %ret
	}			}

	define double @pow_wrapper_minsize(double %a) minsize {			define double @pow_wrapper_minsize(double %a) minsize {
	; CHECK-LABEL: pow_wrapper_minsize:			; CHECK-LABEL: pow_wrapper_minsize:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: movl $15, %edi			; CHECK-NEXT: movl $128, %edi
	; CHECK-NEXT: jmp			; CHECK-NEXT: jmp
	%ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]			%ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; <double> [#uses=1]
	ret double %ret			ret double %ret
	}			}

	declare double @llvm.powi.f64(double, i32) nounwind readonly			declare double @llvm.powi.f64(double, i32) nounwind readonly

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Use push-pop for materializing small constants under 'minsize'
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 43191

llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp

llvm/trunk/lib/Target/X86/X86InstrCompiler.td

llvm/trunk/lib/Target/X86/X86InstrInfo.h

llvm/trunk/lib/Target/X86/X86InstrInfo.cpp

llvm/trunk/lib/Target/X86/X86InstrInfo.td

llvm/trunk/test/CodeGen/X86/materialize-one.ll

llvm/trunk/test/CodeGen/X86/materialize.ll

llvm/trunk/test/CodeGen/X86/powi.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Use push-pop for materializing small constants under 'minsize'ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 43191

llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp

llvm/trunk/lib/Target/X86/X86InstrCompiler.td

llvm/trunk/lib/Target/X86/X86InstrInfo.h

llvm/trunk/lib/Target/X86/X86InstrInfo.cpp

llvm/trunk/lib/Target/X86/X86InstrInfo.td

llvm/trunk/test/CodeGen/X86/materialize-one.ll

llvm/trunk/test/CodeGen/X86/materialize.ll

llvm/trunk/test/CodeGen/X86/powi.ll

[X86] Use push-pop for materializing small constants under 'minsize'
ClosedPublic