Diff 43085

lib/Target/X86/X86ISelDAGToDAG.cpp

Show First 20 Lines • Show All 151 Lines • ▼ Show 20 Lines	class X86DAGToDAGISel final : public SelectionDAGISel {
/// Keep a pointer to the X86Subtarget around so that we can		/// Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.		/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;		const X86Subtarget *Subtarget;

/// If true, selector should try to optimize for code size instead of		/// If true, selector should try to optimize for code size instead of
/// performance.		/// performance.
bool OptForSize;		bool OptForSize;

		/// If true, selector should try to optimize for minimum code size.
		bool OptForMinSize;

public:		public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)		explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), OptForSize(false) {}		: SelectionDAGISel(tm, OptLevel), OptForSize(false),
		OptForMinSize(false) {}

const char *getPassName() const override {		const char *getPassName() const override {
return "X86 DAG->DAG Instruction Selection";		return "X86 DAG->DAG Instruction Selection";
}		}

bool runOnMachineFunction(MachineFunction &MF) override {		bool runOnMachineFunction(MachineFunction &MF) override {
// Reset the subtarget each time through.		// Reset the subtarget each time through.
Subtarget = &MF.getSubtarget<X86Subtarget>();		Subtarget = &MF.getSubtarget<X86Subtarget>();
▲ Show 20 Lines • Show All 355 Lines • ▼ Show 20 Lines	static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&		if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&		Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
Callee.getValue(1).hasOneUse())		Callee.getValue(1).hasOneUse())
return true;		return true;
return false;		return false;
}		}

void X86DAGToDAGISel::PreprocessISelDAG() {		void X86DAGToDAGISel::PreprocessISelDAG() {
// OptForSize is used in pattern predicates that isel is matching.		// OptFor[Min]Size are used in pattern predicates that isel is matching.
OptForSize = MF->getFunction()->optForSize();		OptForSize = MF->getFunction()->optForSize();
		OptForMinSize = MF->getFunction()->optForMinSize();
		assert((!OptForMinSize \|\| OptForSize) && "OptForMinSize implies OptForSize");

for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),		for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {		E = CurDAG->allnodes_end(); I != E; ) {
SDNode N = &I++; // Preincrement iterator to avoid invalidation issues.		SDNode N = &I++; // Preincrement iterator to avoid invalidation issues.

if (OptLevel != CodeGenOpt::None &&		if (OptLevel != CodeGenOpt::None &&
// Only does this when target favors doesn't favor register indirect		// Only does this when target favors doesn't favor register indirect
// call.		// call.
▲ Show 20 Lines • Show All 2,469 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrCompiler.td

	Show First 20 Lines • Show All 244 Lines • ▼ Show 20 Lines

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// Alias Instructions			// Alias Instructions
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	// Alias instruction mapping movr0 to xor.			// Alias instruction mapping movr0 to xor.
	// FIXME: remove when we can teach regalloc that xor reg, reg is ok.			// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
	let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,			let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
	isPseudo = 1 in			isPseudo = 1, AddedComplexity = 20 in
	def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",			def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
	[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;			[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;

	// Other widths can also make use of the 32-bit xor, which may have a smaller			// Other widths can also make use of the 32-bit xor, which may have a smaller
	// encoding and avoid partial register updates.			// encoding and avoid partial register updates.
	def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;			def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
	def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;			def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
	def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {			def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
	let AddedComplexity = 20;			let AddedComplexity = 20;
	}			}

	let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],			let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
	AddedComplexity = 1 in {			AddedComplexity = 15 in {
	// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,			// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
	// which only require 3 bytes compared to MOV32ri which requires 5.			// which only require 3 bytes compared to MOV32ri which requires 5.
	let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {			let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
	def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",			def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
	[(set GR32:$dst, 1)]>;			[(set GR32:$dst, 1)]>;
	def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",			def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
	[(set GR32:$dst, -1)]>;			[(set GR32:$dst, -1)]>;
	}			}

	// MOV16ri is 4 bytes, so the instructions above are smaller.			// MOV16ri is 4 bytes, so the instructions above are smaller.
	def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;			def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;
	def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;			def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
	}			}

				let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
				// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
				// FIXME: Add itinerary class and Schedule.
				def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
				[(set GR32:$dst, i32immSExt8:$src)]>,
				Requires<[OptForMinSize, Not64BitMode]>;
				def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
				[(set GR64:$dst, i64immSExt8:$src)]>,
				Requires<[OptForMinSize, NotWin64WithoutFP]>;
				}

	// Materialize i64 constant where top 32-bits are zero. This could theoretically			// Materialize i64 constant where top 32-bits are zero. This could theoretically
	// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however			// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
	// that would make it more difficult to rematerialize.			// that would make it more difficult to rematerialize.
	let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,			let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
	isCodeGenOnly = 1, hasSideEffects = 0 in			isCodeGenOnly = 1, hasSideEffects = 0 in
	def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),			def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
	"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;			"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;

	▲ Show 20 Lines • Show All 1,577 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrInfo.h

Show All 17 Lines
#include "X86RegisterInfo.h"		#include "X86RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"		#include "llvm/ADT/DenseMap.h"
#include "llvm/Target/TargetInstrInfo.h"		#include "llvm/Target/TargetInstrInfo.h"

#define GET_INSTRINFO_HEADER		#define GET_INSTRINFO_HEADER
#include "X86GenInstrInfo.inc"		#include "X86GenInstrInfo.inc"

namespace llvm {		namespace llvm {
		class MachineInstrBuilder;
class X86RegisterInfo;		class X86RegisterInfo;
class X86Subtarget;		class X86Subtarget;

namespace X86 {		namespace X86 {
// X86 specific condition code. These correspond to X86_*_COND in		// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.		// X86InstrInfo.td. They must be kept in synch.
enum CondCode {		enum CondCode {
COND_A = 0,		COND_A = 0,
▲ Show 20 Lines • Show All 525 Lines • ▼ Show 20 Lines	MachineInstr foldMemoryOperandCustom(MachineFunction &MF, MachineInstr MI,
ArrayRef<MachineOperand> MOs,		ArrayRef<MachineOperand> MOs,
MachineBasicBlock::iterator InsertPt,		MachineBasicBlock::iterator InsertPt,
unsigned Size, unsigned Align) const;		unsigned Size, unsigned Align) const;

/// isFrameOperand - Return true and the FrameIndex if the specified		/// isFrameOperand - Return true and the FrameIndex if the specified
/// operand and follow operands form a reference to the stack frame.		/// operand and follow operands form a reference to the stack frame.
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,		bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
int &FrameIndex) const;		int &FrameIndex) const;

		/// Expand the MOVImmSExti8 pseudo-instructions.
		bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
};		};

} // End llvm namespace		} // End llvm namespace

#endif		#endif

lib/Target/X86/X86InstrInfo.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show All 17 Lines
#include "X86Subtarget.h"		#include "X86Subtarget.h"
#include "X86TargetMachine.h"		#include "X86TargetMachine.h"
#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"		#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineConstantPool.h"		#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"		#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"		#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"		#include "llvm/CodeGen/MachineInstrBuilder.h"
		#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"		#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"		#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"		#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"		#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"		#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"		#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"		#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"		#include "llvm/MC/MCInst.h"
▲ Show 20 Lines • Show All 5,258 Lines • ▼ Show 20 Lines	static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,

// Turn the pseudo into an INC or DEC.		// Turn the pseudo into an INC or DEC.
MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));		MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
MIB.addReg(Reg);		MIB.addReg(Reg);

return true;		return true;
}		}

		bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
		MachineBasicBlock &MBB = *MIB->getParent();
		DebugLoc DL = MIB->getDebugLoc();
		int64_t Imm = MIB->getOperand(1).getImm();
		assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
		MachineBasicBlock::iterator I = MIB.getInstr();

		int StackAdjustment;
		switch (MIB->getOpcode()) {
		case X86::MOV32ImmSExti8:
		DavidKreitzerUnsubmitted Not Done Reply Inline Actions This looks correct, but you could write it more simply by unifying 5311-5318 & 5327-5330. They are the same except for the lines that change the result register. You could use getX86SubSuperRegister(MIB->getOperand(0).getReg(), MVT::i64), which will work regardless of whether the original register is 32 or 64 bits. DavidKreitzer: This looks correct, but you could write it more simply by unifying 5311-5318 & 5327-5330. They…
		hansAuthorUnsubmitted Not Done Reply Inline Actions I hadn't seen getX86SubSuperRegister() before, that seems very handy. Thanks! hans: I hadn't seen getX86SubSuperRegister() before, that seems very handy. Thanks!
		DavidKreitzerUnsubmitted Not Done Reply Inline Actions Much better, thanks! DavidKreitzer: Much better, thanks!
		StackAdjustment = 4;
		BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
		MIB->setDesc(get(X86::POP32r));
		break;
		case X86::MOV64ImmSExti8:
		StackAdjustment = 8;
		BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
		MIB->setDesc(get(X86::POP64r));
		break;
		default:
		llvm_unreachable("Unxpected opcode!");
		}

		// Build CFI if necessary.
		MachineFunction &MF = *MBB.getParent();
		const X86FrameLowering *TFL = Subtarget.getFrameLowering();
		bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
		bool NeedsDwarfCFI =
		!IsWin64Prologue &&
		(MF.getMMI().hasDebugInfo() \|\| MF.getFunction()->needsUnwindTableEntry());
		bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
		if (EmitCFI) {
		TFL->BuildCFI(MBB, I, DL,
		MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
		TFL->BuildCFI(MBB, std::next(I), DL,
		MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
		}

		return true;
		}

// LoadStackGuard has so far only been implemented for 64-bit MachO. Different		// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
// code sequence is needed for other targets.		// code sequence is needed for other targets.
static void expandLoadStackGuard(MachineInstrBuilder &MIB,		static void expandLoadStackGuard(MachineInstrBuilder &MIB,
const TargetInstrInfo &TII) {		const TargetInstrInfo &TII) {
MachineBasicBlock &MBB = *MIB->getParent();		MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();		DebugLoc DL = MIB->getDebugLoc();
unsigned Reg = MIB->getOperand(0).getReg();		unsigned Reg = MIB->getOperand(0).getReg();
const GlobalValue *GV =		const GlobalValue *GV =
Show All 16 Lines	bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);		MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
switch (MI->getOpcode()) {		switch (MI->getOpcode()) {
case X86::MOV32r0:		case X86::MOV32r0:
return Expand2AddrUndef(MIB, get(X86::XOR32rr));		return Expand2AddrUndef(MIB, get(X86::XOR32rr));
case X86::MOV32r1:		case X86::MOV32r1:
return expandMOV32r1(MIB, this, /MinusOne=*/ false);		return expandMOV32r1(MIB, this, /MinusOne=*/ false);
case X86::MOV32r_1:		case X86::MOV32r_1:
return expandMOV32r1(MIB, this, /MinusOne=*/ true);		return expandMOV32r1(MIB, this, /MinusOne=*/ true);
		case X86::MOV32ImmSExti8:
		case X86::MOV64ImmSExti8:
		return ExpandMOVImmSExti8(MIB);
case X86::SETB_C8r:		case X86::SETB_C8r:
return Expand2AddrUndef(MIB, get(X86::SBB8rr));		return Expand2AddrUndef(MIB, get(X86::SBB8rr));
case X86::SETB_C16r:		case X86::SETB_C16r:
return Expand2AddrUndef(MIB, get(X86::SBB16rr));		return Expand2AddrUndef(MIB, get(X86::SBB16rr));
case X86::SETB_C32r:		case X86::SETB_C32r:
return Expand2AddrUndef(MIB, get(X86::SBB32rr));		return Expand2AddrUndef(MIB, get(X86::SBB32rr));
case X86::SETB_C64r:		case X86::SETB_C64r:
return Expand2AddrUndef(MIB, get(X86::SBB64rr));		return Expand2AddrUndef(MIB, get(X86::SBB64rr));
▲ Show 20 Lines • Show All 1,964 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrInfo.td

	Show First 20 Lines • Show All 814 Lines • ▼ Show 20 Lines
	def In16BitMode : Predicate<"Subtarget->is16Bit()">,			def In16BitMode : Predicate<"Subtarget->is16Bit()">,
	AssemblerPredicate<"Mode16Bit", "16-bit mode">;			AssemblerPredicate<"Mode16Bit", "16-bit mode">;
	def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,			def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,
	AssemblerPredicate<"!Mode16Bit", "Not 16-bit mode">;			AssemblerPredicate<"!Mode16Bit", "Not 16-bit mode">;
	def In32BitMode : Predicate<"Subtarget->is32Bit()">,			def In32BitMode : Predicate<"Subtarget->is32Bit()">,
	AssemblerPredicate<"Mode32Bit", "32-bit mode">;			AssemblerPredicate<"Mode32Bit", "32-bit mode">;
	def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;			def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
	def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;			def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
				def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() \|\|"
				"Subtarget->getFrameLowering()->hasFP(*MF)">;
	def IsPS4 : Predicate<"Subtarget->isTargetPS4()">;			def IsPS4 : Predicate<"Subtarget->isTargetPS4()">;
	def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">;			def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">;
	def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;			def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
	def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;			def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
	def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;			def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
	def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;			def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
	def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"			def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
	"TM.getCodeModel() != CodeModel::Kernel">;			"TM.getCodeModel() != CodeModel::Kernel">;
	def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small \|\|"			def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small \|\|"
	"TM.getCodeModel() == CodeModel::Kernel">;			"TM.getCodeModel() == CodeModel::Kernel">;
	def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;			def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
	def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;			def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
	def OptForSize : Predicate<"OptForSize">;			def OptForSize : Predicate<"OptForSize">;
				def OptForMinSize : Predicate<"OptForMinSize">;
	def OptForSpeed : Predicate<"!OptForSize">;			def OptForSpeed : Predicate<"!OptForSize">;
	def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;			def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
	def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;			def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
	def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;			def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
	def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;			def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
	def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;			def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	▲ Show 20 Lines • Show All 2,212 Lines • Show Last 20 Lines

test/CodeGen/X86/materialize-one.ll

This file was deleted.

	; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - \| FileCheck %s --check-prefix=CHECK32
	; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - \| FileCheck %s --check-prefix=CHECK64

	define i32 @one32() optsize {
	entry:
	ret i32 1

	; CHECK32-LABEL: one32
	; CHECK32: xorl %eax, %eax
	; CHECK32-NEXT: incl %eax
	; CHECK32-NEXT: ret

	; FIXME: Figure out the best approach in 64-bit mode.
	; CHECK64-LABEL: one32
	; CHECK64: movl $1, %eax
	; CHECK64-NEXT: retq
	}

	define i32 @minus_one32() optsize {
	entry:
	ret i32 -1

	; CHECK32-LABEL: minus_one32
	; CHECK32: xorl %eax, %eax
	; CHECK32-NEXT: decl %eax
	; CHECK32-NEXT: ret
	}

	define i16 @one16() optsize {
	entry:
	ret i16 1

	; CHECK32-LABEL: one16
	; CHECK32: xorl %eax, %eax
	; CHECK32-NEXT: incl %eax
	; CHECK32-NEXT: retl
	}

	define i16 @minus_one16() optsize {
	entry:
	ret i16 -1

	; CHECK32-LABEL: minus_one16
	; CHECK32: xorl %eax, %eax
	; CHECK32-NEXT: decl %eax
	; CHECK32-NEXT: retl
	}

	define i32 @test_rematerialization() optsize {
	entry:
	; Materialize -1 (thiscall forces it into %ecx).
	tail call x86_thiscallcc void @f(i32 -1)

	; Clobber all registers except %esp, leaving nowhere to store the -1 besides
	; spilling it to the stack.
	tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()

	; -1 should be re-materialized here instead of getting spilled above.
	ret i32 -1

	; CHECK32-LABEL: test_rematerialization
	; CHECK32: xorl %ecx, %ecx
	; CHECK32-NEXT: decl %ecx
	; CHECK32: calll
	; CHECK32: xorl %eax, %eax
	; CHECK32-NEXT: decl %eax
	; CHECK32-NOT: %eax
	; CHECK32: retl
	}

	define i32 @test_rematerialization2(i32 %x) optsize {
	entry:
	; Materialize -1 (thiscall forces it into %ecx).
	tail call x86_thiscallcc void @f(i32 -1)

	; Clobber all registers except %esp, leaving nowhere to store the -1 besides
	; spilling it to the stack.
	tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()

	; Define eflags.
	%a = icmp ne i32 %x, 123
	%b = zext i1 %a to i32
	; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
	; It must therefore not use the xor-dec lowering.
	%c = select i1 %a, i32 %b, i32 -1
	ret i32 %c

	; CHECK32-LABEL: test_rematerialization2
	; CHECK32: xorl %ecx, %ecx
	; CHECK32-NEXT: decl %ecx
	; CHECK32: calll
	; CHECK32: cmpl
	; CHECK32: setne
	; CHECK32-NOT: xorl
	; CHECK32: movl $-1
	; CHECK32: cmov
	; CHECK32: retl
	}

	declare x86_thiscallcc void @f(i32)

test/CodeGen/X86/materialize.ll

This file was added.

				; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - \| FileCheck %s --check-prefix=CHECK32
				; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - \| FileCheck %s --check-prefix=CHECK64
				; RUN: llc -mtriple=x86_64-pc-win32 -mattr=+cmov %s -o - \| FileCheck %s --check-prefix=CHECKWIN64

				define i32 @one32_nooptsize() {
				entry:
				ret i32 1

				; When not optimizing for size, use mov.
				; CHECK32-LABEL: one32_nooptsize:
				; CHECK32: movl $1, %eax
				; CHECK32-NEXT: retl
				; CHECK64-LABEL: one32_nooptsize:
				; CHECK64: movl $1, %eax
				; CHECK64-NEXT: retq
				}

				define i32 @one32() optsize {
				entry:
				ret i32 1

				; CHECK32-LABEL: one32:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: incl %eax
				; CHECK32-NEXT: retl

				; FIXME: Figure out the best approach in 64-bit mode.
				; CHECK64-LABEL: one32:
				; CHECK64: movl $1, %eax
				; CHECK64-NEXT: retq
				}

				define i32 @one32_minsize() minsize {
				entry:
				ret i32 1

				; On 32-bit, xor-inc is preferred over push-pop.
				; CHECK32-LABEL: one32_minsize:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: incl %eax
				; CHECK32-NEXT: retl

				; FIXME: 64-bit doesn't realize it can materialize a 64-bit 1 and take the low half.
				; CHECK64-LABEL: one32_minsize:
				DavidKreitzerUnsubmitted Not Done Reply Inline Actions The reason for deferring the use of xor-inc/dec on 64-bit was the possible REX prefixes, right? You'll have the same problem with the pop, though it will only cost one byte rather than two. Is that one-byte difference the reason you allow push/pop currently but avoid xor-inc/dec? DavidKreitzer: The reason for deferring the use of xor-inc/dec on 64-bit was the possible REX prefixes, right?
				hansAuthorUnsubmitted Not Done Reply Inline Actions Exactly, even with REX, push/pop is always a win. hans: Exactly, even with REX, push/pop is always a win.
				; CHECK64: movl $1, %eax
				; CHECK64-NEXT: retq
				DavidKreitzerUnsubmitted Not Done Reply Inline Actions Hmmm, pushl and popl are not valid instructions in 64-bit mode. They should instead be pushq and popq, and the CFA adjustments should be +/-8. It looks like this is just a problem in the test, as the code to generate them looks fine. DavidKreitzer: Hmmm, pushl and popl are not valid instructions in 64-bit mode. They should instead be pushq…
				hansAuthorUnsubmitted Not Done Reply Inline Actions Hmm, it's not just the test. It's selecting the MOV32ImmSExti8 instruction. I'll fix that. And I now realize that the instruction selector doesn't realize it can use MOV64ImmSExti8 to get a 32-bit constant. I tried to add a pattern for that, but failed so far.. :-/ hans: Hmm, it's not just the test. It's selecting the MOV32ImmSExti8 instruction. I'll fix that. And…
				DavidKreitzerUnsubmitted Not Done Reply Inline Actions Ah, okay, thanks for the explanation. So maybe the right thing to do is to allow MOV32ImmSExti8 to be selected on both 32-bit and 64-bit. Then in ExpandMOVImmSExti8, base the push/pop size not on the opcode but on the target. (Just maybe assert that MOV64ImmSExti8 doesn't occur on 32-bit.) DavidKreitzer: Ah, okay, thanks for the explanation. So maybe the right thing to do is to allow…
				hansAuthorUnsubmitted Not Done Reply Inline Actions That sounds good to me. The tricky part is that we'll have to widen the target register of the pop, but I think that's doable. hans: That sounds good to me. The tricky part is that we'll have to widen the target register of the…
				}

				define i64 @one64_minsize() minsize {
				entry:
				ret i64 1
				; On 64-bit we don't do xor-inc yet, so push-pop it is.
				; CHECK64-LABEL: one64_minsize:
				; CHECK64: pushq $1
				; CHECK64: .cfi_adjust_cfa_offset 8
				; CHECK64: popq %rax
				; CHECK64: .cfi_adjust_cfa_offset -8
				; CHECK64-NEXT: retq

				; On Win64 we can't adjust the stack unless there's a frame pointer.
				; CHECKWIN64-LABEL: one64_minsize:
				; CHECKWIN64: movl $1, %eax
				; CHECKWIN64-NEXT: retq
				}

				define i32 @minus_one32() optsize {
				entry:
				ret i32 -1

				; CHECK32-LABEL: minus_one32:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: decl %eax
				; CHECK32-NEXT: retl
				}

				define i32 @minus_one32_minsize() minsize {
				entry:
				ret i32 -1

				; xor-dec is preferred over push-pop.
				; CHECK32-LABEL: minus_one32_minsize:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: decl %eax
				; CHECK32-NEXT: retl
				}

				define i16 @one16() optsize {
				entry:
				ret i16 1

				; CHECK32-LABEL: one16:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: incl %eax
				; CHECK32-NEXT: retl
				}

				define i16 @minus_one16() optsize {
				entry:
				ret i16 -1

				; CHECK32-LABEL: minus_one16:
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: decl %eax
				; CHECK32-NEXT: retl
				}

				define i32 @minus_five32() minsize {
				entry:
				ret i32 -5

				; CHECK32-LABEL: minus_five32:
				; CHECK32: pushl $-5
				; CHECK32: popl %eax
				; CHECK32: retl
				}

				define i64 @minus_five64() minsize {
				entry:
				ret i64 -5

				; CHECK64-LABEL: minus_five64:
				; CHECK64: pushq $-5
				; CHECK64: .cfi_adjust_cfa_offset 8
				; CHECK64: popq %rax
				; CHECK64: .cfi_adjust_cfa_offset -8
				; CHECK64: retq
				}

				define i32 @rematerialize_minus_one() optsize {
				entry:
				; Materialize -1 (thiscall forces it into %ecx).
				tail call x86_thiscallcc void @f(i32 -1)

				; Clobber all registers except %esp, leaving nowhere to store the -1 besides
				; spilling it to the stack.
				tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()

				; -1 should be re-materialized here instead of getting spilled above.
				ret i32 -1

				; CHECK32-LABEL: rematerialize_minus_one
				; CHECK32: xorl %ecx, %ecx
				; CHECK32-NEXT: decl %ecx
				; CHECK32: calll
				; CHECK32: xorl %eax, %eax
				; CHECK32-NEXT: decl %eax
				; CHECK32-NOT: %eax
				; CHECK32: retl
				}

				define i32 @rematerialize_minus_one_eflags(i32 %x) optsize {
				entry:
				; Materialize -1 (thiscall forces it into %ecx).
				tail call x86_thiscallcc void @f(i32 -1)

				; Clobber all registers except %esp, leaving nowhere to store the -1 besides
				; spilling it to the stack.
				tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()

				; Define eflags.
				%a = icmp ne i32 %x, 123
				%b = zext i1 %a to i32
				; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
				; It must therefore not use the xor-dec lowering.
				%c = select i1 %a, i32 %b, i32 -1
				ret i32 %c

				; CHECK32-LABEL: rematerialize_minus_one_eflags
				; CHECK32: xorl %ecx, %ecx
				; CHECK32-NEXT: decl %ecx
				; CHECK32: calll
				; CHECK32: cmpl
				; CHECK32: setne
				; CHECK32-NOT: xorl
				; CHECK32: movl $-1
				; CHECK32: cmov
				; CHECK32: retl
				}

				declare x86_thiscallcc void @f(i32)

test/CodeGen/X86/powi.ll

	Show All 23 Lines
	; CHECK-NEXT: jmp			; CHECK-NEXT: jmp
	%ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]			%ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
	ret double %ret			ret double %ret
	}			}

	define double @pow_wrapper_minsize(double %a) minsize {			define double @pow_wrapper_minsize(double %a) minsize {
	; CHECK-LABEL: pow_wrapper_minsize:			; CHECK-LABEL: pow_wrapper_minsize:
	; CHECK: # BB#0:			; CHECK: # BB#0:
	; CHECK-NEXT: movl $15, %edi			; CHECK-NEXT: movl $128, %edi
	; CHECK-NEXT: jmp			; CHECK-NEXT: jmp
	%ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]			%ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; <double> [#uses=1]
	ret double %ret			ret double %ret
	}			}

	declare double @llvm.powi.f64(double, i32) nounwind readonly			declare double @llvm.powi.f64(double, i32) nounwind readonly

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Use push-pop for materializing small constants under 'minsize'
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 43085

lib/Target/X86/X86ISelDAGToDAG.cpp

lib/Target/X86/X86InstrCompiler.td

lib/Target/X86/X86InstrInfo.h

lib/Target/X86/X86InstrInfo.cpp

lib/Target/X86/X86InstrInfo.td

test/CodeGen/X86/materialize-one.ll

test/CodeGen/X86/materialize.ll

test/CodeGen/X86/powi.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Use push-pop for materializing small constants under 'minsize'ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 43085

lib/Target/X86/X86ISelDAGToDAG.cpp

lib/Target/X86/X86InstrCompiler.td

lib/Target/X86/X86InstrInfo.h

lib/Target/X86/X86InstrInfo.cpp

lib/Target/X86/X86InstrInfo.td

test/CodeGen/X86/materialize-one.ll

test/CodeGen/X86/materialize.ll

test/CodeGen/X86/powi.ll

[X86] Use push-pop for materializing small constants under 'minsize'
ClosedPublic