Diff 12297

lib/Target/X86/X86CodeEmitter.cpp

	Show First 20 Lines • Show All 1,002 Lines • ▼ Show 20 Lines

	template<class CodeEmitter>			template<class CodeEmitter>
	void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,			void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
	const MCInstrDesc *Desc) {			const MCInstrDesc *Desc) {
	DEBUG(dbgs() << MI);			DEBUG(dbgs() << MI);

	// If this is a pseudo instruction, lower it.			// If this is a pseudo instruction, lower it.
	switch (Desc->getOpcode()) {			switch (Desc->getOpcode()) {
	case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break;			case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break;
	case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break;			case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break;
	case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break;			case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break;
	case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break;			case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break;
	case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break;			case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break;
	case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break;			case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break;
	case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break;			case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break;
	case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break;			case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break;
	case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break;			case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break;
	case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break;			case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break;
	case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break;			case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break;
	case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break;			case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break;
	case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break;			case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break;
	case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break;			case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break;
	case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break;			case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break;
	case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break;			case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break;
	case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break;			case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break;
				case X86::RELEASE_MOV8mi: Desc = UpdateOp(MI, II, X86::MOV8mi); break;
				case X86::RELEASE_MOV16mi: Desc = UpdateOp(MI, II, X86::MOV16mi); break;
				case X86::RELEASE_MOV32mi: Desc = UpdateOp(MI, II, X86::MOV32mi); break;
				case X86::RELEASE_MOV64mi32: Desc = UpdateOp(MI, II, X86::MOV64mi32); break;
				case X86::RELEASE_ADD8mi: Desc = UpdateOp(MI, II, X86::ADD8mi); break;
				case X86::RELEASE_ADD32mi: Desc = UpdateOp(MI, II, X86::ADD32mi); break;
				case X86::RELEASE_ADD64mi32: Desc = UpdateOp(MI, II, X86::ADD64mi32); break;
				case X86::RELEASE_AND8mi: Desc = UpdateOp(MI, II, X86::AND8mi); break;
				case X86::RELEASE_AND32mi: Desc = UpdateOp(MI, II, X86::AND32mi); break;
				case X86::RELEASE_AND64mi32: Desc = UpdateOp(MI, II, X86::AND64mi32); break;
				case X86::RELEASE_OR8mi: Desc = UpdateOp(MI, II, X86::OR8mi); break;
				case X86::RELEASE_OR32mi: Desc = UpdateOp(MI, II, X86::OR32mi); break;
				case X86::RELEASE_OR64mi32: Desc = UpdateOp(MI, II, X86::OR64mi32); break;
				case X86::RELEASE_XOR8mi: Desc = UpdateOp(MI, II, X86::XOR8mi); break;
				case X86::RELEASE_XOR32mi: Desc = UpdateOp(MI, II, X86::XOR32mi); break;
				case X86::RELEASE_XOR64mi32: Desc = UpdateOp(MI, II, X86::XOR64mi32); break;
				case X86::RELEASE_INC8m: Desc = UpdateOp(MI, II, X86::INC8m); break;
				case X86::RELEASE_INC16m: Desc = UpdateOp(MI, II, X86::INC16m); break;
				case X86::RELEASE_INC32m: Desc = UpdateOp(MI, II, X86::INC32m); break;
				case X86::RELEASE_INC64m: Desc = UpdateOp(MI, II, X86::INC64m); break;
				case X86::RELEASE_DEC8m: Desc = UpdateOp(MI, II, X86::DEC8m); break;
				case X86::RELEASE_DEC16m: Desc = UpdateOp(MI, II, X86::DEC16m); break;
				case X86::RELEASE_DEC32m: Desc = UpdateOp(MI, II, X86::DEC32m); break;
				case X86::RELEASE_DEC64m: Desc = UpdateOp(MI, II, X86::DEC64m); break;
	}			}


	MCE.processDebugLoc(MI.getDebugLoc(), true);			MCE.processDebugLoc(MI.getDebugLoc(), true);

	unsigned Opcode = Desc->Opcode;			unsigned Opcode = Desc->Opcode;

	// If this is a two-address instruction, skip one of the register operands.			// If this is a two-address instruction, skip one of the register operands.
	▲ Show 20 Lines • Show All 467 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrCompiler.td

Show First 20 Lines • Show All 745 Lines • ▼ Show 20 Lines	def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
itin>;		itin>;
}		}
}		}

defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",		defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,		IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>,
TB, LOCK;		TB, LOCK;

def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),		/* The following multiclass tries to make sure that in code like
"#ACQUIRE_MOV PSEUDO!",		* x.store (immediate op x.load(acquire), release)
[(set GR8:$dst, (atomic_load_8 addr:$src))]>;		* an operation directly on memory is generated instead of wasting a register.
def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),		* It is not automatic as atomic_store/load are only lowered to MOV instructions
"#ACQUIRE_MOV PSEUDO!",		* extremely late to prevent them from being accidentally reordered in the backend
[(set GR16:$dst, (atomic_load_16 addr:$src))]>;		* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),		*/
"#ACQUIRE_MOV PSEUDO!",		multiclass RELEASE_BINOP_MI<string op> {
[(set GR32:$dst, (atomic_load_32 addr:$src))]>;		def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),		"#RELEASE_BINOP PSEUDO!",
"#ACQUIRE_MOV PSEUDO!",		[(atomic_store_8 addr:$dst, (!cast<PatFrag>(op)
[(set GR64:$dst, (atomic_load_64 addr:$src))]>;		(atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
		// NAME#16 is not generated as 16-bit arithmetic instructions are considered
		// costly and avoided as far as possible by this backend anyway
		def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
		"#RELEASE_BINOP PSEUDO!",
		[(atomic_store_32 addr:$dst, (!cast<PatFrag>(op)
		(atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
		def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
		"#RELEASE_BINOP PSEUDO!",
		[(atomic_store_64 addr:$dst, (!cast<PatFrag>(op)
		(atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
		}
		defm RELEASE_ADD : RELEASE_BINOP_MI<"add">;
		defm RELEASE_AND : RELEASE_BINOP_MI<"and">;
		defm RELEASE_OR : RELEASE_BINOP_MI<"or">;
		defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">;
		// Note: we don't deal with sub, because substractions of constants are
		// optimized into additions before this code can run

		multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
		def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
		"#RELEASE_UNOP PSEUDO!",
		[(atomic_store_8 addr:$dst, dag8)]>;
		def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
		"#RELEASE_UNOP PSEUDO!",
		[(atomic_store_16 addr:$dst, dag16)]>;
		def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
		"#RELEASE_UNOP PSEUDO!",
		[(atomic_store_32 addr:$dst, dag32)]>;
		def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
		"#RELEASE_UNOP PSEUDO!",
		[(atomic_store_64 addr:$dst, dag64)]>;
		}

		defm RELEASE_INC : RELEASE_UNOP<
		(add (atomic_load_8 addr:$dst), (i8 1)),
		(add (atomic_load_16 addr:$dst), (i16 1)),
		(add (atomic_load_32 addr:$dst), (i32 1)),
		(add (atomic_load_64 addr:$dst), (i64 1))>;
		defm RELEASE_DEC : RELEASE_UNOP<
		(add (atomic_load_8 addr:$dst), (i8 -1)),
		(add (atomic_load_16 addr:$dst), (i16 -1)),
		(add (atomic_load_32 addr:$dst), (i32 -1)),
		(add (atomic_load_64 addr:$dst), (i64 -1))>;
		/*
		TODO: These don't work because the type inference of TableGen fails.
		TODO: find a way to fix it.
		defm RELEASE_NEG : RELEASE_UNOP<
		(ineg (atomic_load_8 addr:$dst)),
		(ineg (atomic_load_16 addr:$dst)),
		(ineg (atomic_load_32 addr:$dst)),
		(ineg (atomic_load_64 addr:$dst))>;
		defm RELEASE_NOT : RELEASE_UNOP<
		(not (atomic_load_8 addr:$dst)),
		(not (atomic_load_16 addr:$dst)),
		(not (atomic_load_32 addr:$dst)),
		(not (atomic_load_64 addr:$dst))>;
		*/

		def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
		"#RELEASE_MOV PSEUDO !",
		[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
		def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
		"#RELEASE_MOV PSEUDO !",
		[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
		def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
		"#RELEASE_MOV PSEUDO !",
		[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
		def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
		"#RELEASE_MOV PSEUDO !",
		[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;

def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),		def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
"#RELEASE_MOV PSEUDO!",		"#RELEASE_MOV PSEUDO!",
[(atomic_store_8 addr:$dst, GR8 :$src)]>;		[(atomic_store_8 addr:$dst, GR8 :$src)]>;
def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),		def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
"#RELEASE_MOV PSEUDO!",		"#RELEASE_MOV PSEUDO!",
[(atomic_store_16 addr:$dst, GR16:$src)]>;		[(atomic_store_16 addr:$dst, GR16:$src)]>;
def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),		def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
"#RELEASE_MOV PSEUDO!",		"#RELEASE_MOV PSEUDO!",
[(atomic_store_32 addr:$dst, GR32:$src)]>;		[(atomic_store_32 addr:$dst, GR32:$src)]>;
def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),		def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
"#RELEASE_MOV PSEUDO!",		"#RELEASE_MOV PSEUDO!",
[(atomic_store_64 addr:$dst, GR64:$src)]>;		[(atomic_store_64 addr:$dst, GR64:$src)]>;

		def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
		"#ACQUIRE_MOV PSEUDO!",
		[(set GR8:$dst, (atomic_load_8 addr:$src))]>;
		def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
		"#ACQUIRE_MOV PSEUDO!",
		[(set GR16:$dst, (atomic_load_16 addr:$src))]>;
		def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
		"#ACQUIRE_MOV PSEUDO!",
		[(set GR32:$dst, (atomic_load_32 addr:$src))]>;
		def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
		"#ACQUIRE_MOV PSEUDO!",
		[(set GR64:$dst, (atomic_load_64 addr:$src))]>;
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Conditional Move Pseudo Instructions.		// Conditional Move Pseudo Instructions.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//


// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after		// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after
// instruction selection into a branch sequence.		// instruction selection into a branch sequence.
let Uses = [EFLAGS], usesCustomInserter = 1 in {		let Uses = [EFLAGS], usesCustomInserter = 1 in {
def CMOV_FR32 : I<0, Pseudo,		def CMOV_FR32 : I<0, Pseudo,
(outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),		(outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
"#CMOV_FR32 PSEUDO!",		"#CMOV_FR32 PSEUDO!",
[(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,		[(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
EFLAGS))]>;		EFLAGS))]>;
▲ Show 20 Lines • Show All 988 Lines • Show Last 20 Lines

lib/Target/X86/X86MCInstLower.cpp

Show First 20 Lines • Show All 577 Lines • ▼ Show 20 Lines	ReSimplify:
case X86::JL_4: OutMI.setOpcode(X86::JL_1); break;		case X86::JL_4: OutMI.setOpcode(X86::JL_1); break;
case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;		case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;		case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;		case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;

// Atomic load and store require a separate pseudo-inst because Acquire		// Atomic load and store require a separate pseudo-inst because Acquire
// implies mayStore and Release implies mayLoad; fix these to regular MOV		// implies mayStore and Release implies mayLoad; fix these to regular MOV
// instructions here		// instructions here
case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;		case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify;
case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;		case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify;
case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;		case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify;
case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;		case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify;
case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;		case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify;
case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;		case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify;
case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;		case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify;
case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;		case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify;
		case X86::RELEASE_MOV8mi: OutMI.setOpcode(X86::MOV8mi); goto ReSimplify;
		case X86::RELEASE_MOV16mi: OutMI.setOpcode(X86::MOV16mi); goto ReSimplify;
		case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify;
		case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
		case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
		case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
		case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
		case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
		case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
		case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
		case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
		case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
		case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
		case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
		case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
		case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
		case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify;
		case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify;
		case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify;
		case X86::RELEASE_INC64m: OutMI.setOpcode(X86::INC64m); goto ReSimplify;
		case X86::RELEASE_DEC8m: OutMI.setOpcode(X86::DEC8m); goto ReSimplify;
		case X86::RELEASE_DEC16m: OutMI.setOpcode(X86::DEC16m); goto ReSimplify;
		case X86::RELEASE_DEC32m: OutMI.setOpcode(X86::DEC32m); goto ReSimplify;
		case X86::RELEASE_DEC64m: OutMI.setOpcode(X86::DEC64m); goto ReSimplify;

// We don't currently select the correct instruction form for instructions		// We don't currently select the correct instruction form for instructions
// which have a short %eax, etc. form. Handle this by custom lowering, for		// which have a short %eax, etc. form. Handle this by custom lowering, for
// now.		// now.
//		//
// Note, we are currently not handling the following instructions:		// Note, we are currently not handling the following instructions:
// MOV64ao8, MOV64o8a		// MOV64ao8, MOV64o8a
// XCHG16ar, XCHG32ar, XCHG64ar		// XCHG16ar, XCHG32ar, XCHG64ar
▲ Show 20 Lines • Show All 481 Lines • Show Last 20 Lines

test/CodeGen/X86/atomic_mi.ll

This file was added.

				; RUN: llc < %s -march=x86-64 -verify-machineinstrs \| FileCheck %s --check-prefix X64
				; RUN: llc < %s -march=x86 -verify-machineinstrs \| FileCheck %s --check-prefix X32

				; This file checks that atomic (non-seq_cst) stores of immediate values are
				; done in one mov instruction and not 2. More precisely, it makes sure that the
				; immediate is not first copied uselessly into a register.

				; Similarily, it checks that a binary operation of an immediate with an atomic
				; variable that is stored back in that variable is done as a single instruction.
				; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
				; should be just an add instruction, instead of loading x into a register, doing
				; an add and storing the result back.
				; The binary operations supported are currently add, and, or, xor.
				; sub is not supported because they are translated by an addition of the
				; negated immediate.
				; Finally, we also check the same kind of pattern for inc/dec

				; seq_cst stores are left as (lock) xchgl, but we try to check every other
				; attribute at least once.

				; Please note that these operations do not require the lock prefix: only
				; sequentially consistent stores require this kind of protection on X86.
				; And even for seq_cst operations, llvm uses the xchg instruction which has
				; an implicit lock prefix, so making it explicit is not required.

				define void @store_atomic_imm_8(i8* %p) {
				; X64-LABEL: store_atomic_imm_8
				; X64: movb
				; X64-NOT: movb
				; X32-LABEL: store_atomic_imm_8
				; X32: movb
				; X32-NOT: movb
				store atomic i8 42, i8* %p release, align 1
				ret void
				}

				define void @store_atomic_imm_16(i16* %p) {
				; X64-LABEL: store_atomic_imm_16
				; X64: movw
				; X64-NOT: movw
				; X32-LABEL: store_atomic_imm_16
				; X32: movw
				; X32-NOT: movw
				store atomic i16 42, i16* %p monotonic, align 2
				ret void
				}

				define void @store_atomic_imm_32(i32* %p) {
				; X64-LABEL: store_atomic_imm_32
				; X64: movl
				; X64-NOT: movl
				; On 32 bits, there is an extra movl for each of those functions
				; (probably for alignment reasons).
				; X32-LABEL: store_atomic_imm_32
				; X32: movl 4(%esp), %eax
				; X32: movl
				; X32-NOT: movl
				store atomic i32 42, i32* %p release, align 4
				ret void
				}

				define void @store_atomic_imm_64(i64* %p) {
				; X64-LABEL: store_atomic_imm_64
				; X64: movq
				; X64-NOT: movq
				; These are implemented with a CAS loop on 32 bit architectures, and thus
				; cannot be optimized in the same way as the others.
				; X32-LABEL: store_atomic_imm_64
				; X32: cmpxchg8b
				store atomic i64 42, i64* %p release, align 8
				ret void
				}

				; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
				; even on X64, one must use movabsq that can only target a register.
				define void @store_atomic_imm_64_big(i64* %p) {
				; X64-LABEL: store_atomic_imm_64_big
				; X64: movabsq
				; X64: movq
				store atomic i64 100000000000, i64* %p monotonic, align 8
				ret void
				}

				; It would be incorrect to replace a lock xchgl by a movl
				define void @store_atomic_imm_32_seq_cst(i32* %p) {
				; X64-LABEL: store_atomic_imm_32_seq_cst
				; X64: xchgl
				; X32-LABEL: store_atomic_imm_32_seq_cst
				; X32: xchgl
				store atomic i32 42, i32* %p seq_cst, align 4
				ret void
				}

				; ----- ADD -----

				define void @add_8(i8* %p) {
				; X64-LABEL: add_8
				; X64-NOT: lock
				; X64: addb
				; X64-NOT: movb
				; X32-LABEL: add_8
				; X32-NOT: lock
				; X32: addb
				; X32-NOT: movb
				%1 = load atomic i8* %p seq_cst, align 1
				%2 = add i8 %1, 2
				store atomic i8 %2, i8* %p release, align 1
				ret void
				}

				define void @add_16(i16* %p) {
				; Currently the transformation is not done on 16 bit accesses, as the backend
				; treat 16 bit arithmetic as expensive on X86/X86_64.
				; X64-LABEL: add_16
				; X64-NOT: addw
				; X32-LABEL: add_16
				; X32-NOT: addw
				%1 = load atomic i16* %p acquire, align 2
				%2 = add i16 %1, 2
				store atomic i16 %2, i16* %p release, align 2
				ret void
				}

				define void @add_32(i32* %p) {
				; X64-LABEL: add_32
				; X64-NOT: lock
				; X64: addl
				; X64-NOT: movl
				; X32-LABEL: add_32
				; X32-NOT: lock
				; X32: addl
				; X32-NOT: movl
				%1 = load atomic i32* %p acquire, align 4
				%2 = add i32 %1, 2
				store atomic i32 %2, i32* %p monotonic, align 4
				ret void
				}

				define void @add_64(i64* %p) {
				; X64-LABEL: add_64
				; X64-NOT: lock
				; X64: addq
				; X64-NOT: movq
				; We do not check X86-32 as it cannot do 'addq'.
				; X32-LABEL: add_64
				%1 = load atomic i64* %p acquire, align 8
				%2 = add i64 %1, 2
				store atomic i64 %2, i64* %p release, align 8
				ret void
				}

				define void @add_32_seq_cst(i32* %p) {
				; X64-LABEL: add_32_seq_cst
				; X64: xchgl
				; X32-LABEL: add_32_seq_cst
				; X32: xchgl
				%1 = load atomic i32* %p monotonic, align 4
				%2 = add i32 %1, 2
				store atomic i32 %2, i32* %p seq_cst, align 4
				ret void
				}

				; ----- AND -----

				define void @and_8(i8* %p) {
				; X64-LABEL: and_8
				; X64-NOT: lock
				; X64: andb
				; X64-NOT: movb
				; X32-LABEL: and_8
				; X32-NOT: lock
				; X32: andb
				; X32-NOT: movb
				%1 = load atomic i8* %p monotonic, align 1
				%2 = and i8 %1, 2
				store atomic i8 %2, i8* %p release, align 1
				ret void
				}

				define void @and_16(i16* %p) {
				; Currently the transformation is not done on 16 bit accesses, as the backend
				; treat 16 bit arithmetic as expensive on X86/X86_64.
				; X64-LABEL: and_16
				; X64-NOT: andw
				; X32-LABEL: and_16
				; X32-NOT: andw
				%1 = load atomic i16* %p acquire, align 2
				%2 = and i16 %1, 2
				store atomic i16 %2, i16* %p release, align 2
				ret void
				}

				define void @and_32(i32* %p) {
				; X64-LABEL: and_32
				; X64-NOT: lock
				; X64: andl
				; X64-NOT: movl
				; X32-LABEL: and_32
				; X32-NOT: lock
				; X32: andl
				; X32-NOT: movl
				%1 = load atomic i32* %p acquire, align 4
				%2 = and i32 %1, 2
				store atomic i32 %2, i32* %p release, align 4
				ret void
				}

				define void @and_64(i64* %p) {
				; X64-LABEL: and_64
				; X64-NOT: lock
				; X64: andq
				; X64-NOT: movq
				; We do not check X86-32 as it cannot do 'andq'.
				; X32-LABEL: and_64
				%1 = load atomic i64* %p acquire, align 8
				%2 = and i64 %1, 2
				store atomic i64 %2, i64* %p release, align 8
				ret void
				}

				define void @and_32_seq_cst(i32* %p) {
				; X64-LABEL: and_32_seq_cst
				; X64: xchgl
				; X32-LABEL: and_32_seq_cst
				reamesUnsubmitted Not Done Reply Inline Actions Shouldn't there be a lock prefix on this one to enforce the StoreLoad barrier required by cst? cst is w.r.t. all addresses (unlike every other ordering mode...) reames: Shouldn't there be a lock prefix on this one to enforce the StoreLoad barrier required by cst?
				; X32: xchgl
				%1 = load atomic i32* %p monotonic, align 4
				%2 = and i32 %1, 2
				store atomic i32 %2, i32* %p seq_cst, align 4
				ret void
				}

				; ----- OR -----

				define void @or_8(i8* %p) {
				; X64-LABEL: or_8
				; X64-NOT: lock
				; X64: orb
				; X64-NOT: movb
				; X32-LABEL: or_8
				; X32-NOT: lock
				; X32: orb
				; X32-NOT: movb
				%1 = load atomic i8* %p acquire, align 1
				%2 = or i8 %1, 2
				store atomic i8 %2, i8* %p release, align 1
				ret void
				}

				define void @or_16(i16* %p) {
				; X64-LABEL: or_16
				; X64-NOT: orw
				; X32-LABEL: or_16
				; X32-NOT: orw
				%1 = load atomic i16* %p acquire, align 2
				%2 = or i16 %1, 2
				store atomic i16 %2, i16* %p release, align 2
				ret void
				}

				define void @or_32(i32* %p) {
				; X64-LABEL: or_32
				; X64-NOT: lock
				; X64: orl
				; X64-NOT: movl
				; X32-LABEL: or_32
				; X32-NOT: lock
				; X32: orl
				; X32-NOT: movl
				%1 = load atomic i32* %p acquire, align 4
				%2 = or i32 %1, 2
				store atomic i32 %2, i32* %p release, align 4
				ret void
				}

				define void @or_64(i64* %p) {
				; X64-LABEL: or_64
				; X64-NOT: lock
				; X64: orq
				; X64-NOT: movq
				; We do not check X86-32 as it cannot do 'orq'.
				; X32-LABEL: or_64
				%1 = load atomic i64* %p acquire, align 8
				%2 = or i64 %1, 2
				store atomic i64 %2, i64* %p release, align 8
				ret void
				}

				define void @or_32_seq_cst(i32* %p) {
				; X64-LABEL: or_32_seq_cst
				; X64: xchgl
				; X32-LABEL: or_32_seq_cst
				; X32: xchgl
				%1 = load atomic i32* %p monotonic, align 4
				%2 = or i32 %1, 2
				reamesUnsubmitted Not Done Reply Inline Actions Same here. reames: Same here.
				store atomic i32 %2, i32* %p seq_cst, align 4
				ret void
				}

				; ----- XOR -----

				define void @xor_8(i8* %p) {
				; X64-LABEL: xor_8
				; X64-NOT: lock
				; X64: xorb
				; X64-NOT: movb
				; X32-LABEL: xor_8
				; X32-NOT: lock
				; X32: xorb
				; X32-NOT: movb
				%1 = load atomic i8* %p acquire, align 1
				%2 = xor i8 %1, 2
				store atomic i8 %2, i8* %p release, align 1
				ret void
				}

				define void @xor_16(i16* %p) {
				; X64-LABEL: xor_16
				; X64-NOT: xorw
				; X32-LABEL: xor_16
				; X32-NOT: xorw
				%1 = load atomic i16* %p acquire, align 2
				%2 = xor i16 %1, 2
				store atomic i16 %2, i16* %p release, align 2
				ret void
				}

				define void @xor_32(i32* %p) {
				; X64-LABEL: xor_32
				; X64-NOT: lock
				; X64: xorl
				; X64-NOT: movl
				; X32-LABEL: xor_32
				; X32-NOT: lock
				; X32: xorl
				; X32-NOT: movl
				%1 = load atomic i32* %p acquire, align 4
				%2 = xor i32 %1, 2
				store atomic i32 %2, i32* %p release, align 4
				ret void
				}

				define void @xor_64(i64* %p) {
				; X64-LABEL: xor_64
				; X64-NOT: lock
				; X64: xorq
				; X64-NOT: movq
				; We do not check X86-32 as it cannot do 'xorq'.
				; X32-LABEL: xor_64
				%1 = load atomic i64* %p acquire, align 8
				%2 = xor i64 %1, 2
				store atomic i64 %2, i64* %p release, align 8
				ret void
				}

				define void @xor_32_seq_cst(i32* %p) {
				; X64-LABEL: xor_32_seq_cst
				; X64: xchgl
				; X32-LABEL: xor_32_seq_cst
				reamesUnsubmitted Not Done Reply Inline Actions Again, lock prefix? reames: Again, lock prefix?
				; X32: xchgl
				%1 = load atomic i32* %p monotonic, align 4
				%2 = xor i32 %1, 2
				store atomic i32 %2, i32* %p seq_cst, align 4
				ret void
				}

				; ----- INC -----

				define void @inc_8(i8* %p) {
				; X64-LABEL: inc_8
				; X64-NOT: lock
				; X64: incb
				; X64-NOT: movb
				; X32-LABEL: inc_8
				; X32-NOT: lock
				; X32: incb
				; X32-NOT: movb
				%1 = load atomic i8* %p seq_cst, align 1
				%2 = add i8 %1, 1
				store atomic i8 %2, i8* %p release, align 1
				ret void
				}

				define void @inc_16(i16* %p) {
				; Currently the transformation is not done on 16 bit accesses, as the backend
				; treat 16 bit arithmetic as expensive on X86/X86_64.
				; X64-LABEL: inc_16
				; X64-NOT: incw
				; X32-LABEL: inc_16
				; X32-NOT: incw
				%1 = load atomic i16* %p acquire, align 2
				%2 = add i16 %1, 1
				store atomic i16 %2, i16* %p release, align 2
				ret void
				}

				define void @inc_32(i32* %p) {
				; X64-LABEL: inc_32
				; X64-NOT: lock
				; X64: incl
				; X64-NOT: movl
				; X32-LABEL: inc_32
				; X32-NOT: lock
				; X32: incl
				; X32-NOT: movl
				%1 = load atomic i32* %p acquire, align 4
				%2 = add i32 %1, 1
				store atomic i32 %2, i32* %p monotonic, align 4
				ret void
				}

				define void @inc_64(i64* %p) {
				; X64-LABEL: inc_64
				; X64-NOT: lock
				; X64: incq
				; X64-NOT: movq
				; We do not check X86-32 as it cannot do 'incq'.
				; X32-LABEL: inc_64
				%1 = load atomic i64* %p acquire, align 8
				%2 = add i64 %1, 1
				store atomic i64 %2, i64* %p release, align 8
				ret void
				}

				define void @inc_32_seq_cst(i32* %p) {
				; X64-LABEL: inc_32_seq_cst
				; X64: xchgl
				; X32-LABEL: inc_32_seq_cst
				; X32: xchgl
				%1 = load atomic i32* %p monotonic, align 4
				%2 = add i32 %1, 1
				store atomic i32 %2, i32* %p seq_cst, align 4
				ret void
				}

				; ----- DEC -----

				define void @dec_8(i8* %p) {
				; X64-LABEL: dec_8
				; X64-NOT: lock
				; X64: decb
				; X64-NOT: movb
				; X32-LABEL: dec_8
				; X32-NOT: lock
				; X32: decb
				; X32-NOT: movb
				%1 = load atomic i8* %p seq_cst, align 1
				%2 = sub i8 %1, 1
				store atomic i8 %2, i8* %p release, align 1
				ret void
				}

				define void @dec_16(i16* %p) {
				; Currently the transformation is not done on 16 bit accesses, as the backend
				; treat 16 bit arithmetic as expensive on X86/X86_64.
				; X64-LABEL: dec_16
				; X64-NOT: decw
				; X32-LABEL: dec_16
				; X32-NOT: decw
				%1 = load atomic i16* %p acquire, align 2
				%2 = sub i16 %1, 1
				store atomic i16 %2, i16* %p release, align 2
				ret void
				}

				define void @dec_32(i32* %p) {
				; X64-LABEL: dec_32
				; X64-NOT: lock
				; X64: decl
				; X64-NOT: movl
				; X32-LABEL: dec_32
				; X32-NOT: lock
				; X32: decl
				; X32-NOT: movl
				%1 = load atomic i32* %p acquire, align 4
				%2 = sub i32 %1, 1
				store atomic i32 %2, i32* %p monotonic, align 4
				ret void
				}

				define void @dec_64(i64* %p) {
				; X64-LABEL: dec_64
				; X64-NOT: lock
				; X64: decq
				; X64-NOT: movq
				; We do not check X86-32 as it cannot do 'decq'.
				; X32-LABEL: dec_64
				%1 = load atomic i64* %p acquire, align 8
				%2 = sub i64 %1, 1
				store atomic i64 %2, i64* %p release, align 8
				ret void
				}

				define void @dec_32_seq_cst(i32* %p) {
				; X64-LABEL: dec_32_seq_cst
				; X64: xchgl
				; X32-LABEL: dec_32_seq_cst
				; X32: xchgl
				%1 = load atomic i32* %p monotonic, align 4
				%2 = sub i32 %1, 1
				store atomic i32 %2, i32* %p seq_cst, align 4
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Allow atomic operations using immediates to avoid using a register
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 12297

lib/Target/X86/X86CodeEmitter.cpp

lib/Target/X86/X86InstrCompiler.td

lib/Target/X86/X86MCInstLower.cpp

test/CodeGen/X86/atomic_mi.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Allow atomic operations using immediates to avoid using a registerClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 12297

lib/Target/X86/X86CodeEmitter.cpp

lib/Target/X86/X86InstrCompiler.td

lib/Target/X86/X86MCInstLower.cpp

test/CodeGen/X86/atomic_mi.ll

[X86] Allow atomic operations using immediates to avoid using a register
ClosedPublic