Diff 411941

llvm/include/llvm/CodeGen/TargetLowering.h

Show First 20 Lines • Show All 247 Lines • ▼ Show 20 Lines	public:
/// w.r.t. what they should expand to.		/// w.r.t. what they should expand to.
enum class AtomicExpansionKind {		enum class AtomicExpansionKind {
None, // Don't expand the instruction.		None, // Don't expand the instruction.
LLSC, // Expand the instruction into loadlinked/storeconditional; used		LLSC, // Expand the instruction into loadlinked/storeconditional; used
// by ARM/AArch64.		// by ARM/AArch64.
LLOnly, // Expand the (load) instruction into just a load-linked, which has		LLOnly, // Expand the (load) instruction into just a load-linked, which has
// greater atomic guarantees than a normal load.		// greater atomic guarantees than a normal load.
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.		CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.		MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
		BitTestIntrinsic, // Use a target-specific intrinsic for special bit
		// operations; used by X86.
};		};

/// Enum that specifies when a multiplication should be expanded.		/// Enum that specifies when a multiplication should be expanded.
enum class MulExpansionKind {		enum class MulExpansionKind {
Always, // Always expand the instruction.		Always, // Always expand the instruction.
OnlyLegalOrCustom, // Only expand when the resulting instructions are legal		OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
// or custom.		// or custom.
};		};
▲ Show 20 Lines • Show All 1,681 Lines • ▼ Show 20 Lines	public:
virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,		virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,
AtomicRMWInst *AI,		AtomicRMWInst *AI,
Value AlignedAddr, Value Incr,		Value AlignedAddr, Value Incr,
Value Mask, Value ShiftAmt,		Value Mask, Value ShiftAmt,
AtomicOrdering Ord) const {		AtomicOrdering Ord) const {
llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");		llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
}		}

		/// Perform a bit test atomicrmw using a target-specific intrinsic. This
		/// represents the combined bit test intrinsic which will be lowered at a late
		/// stage by the backend.
		virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
		llvm_unreachable(
		"Bit test atomicrmw expansion unimplemented on this target");
		}

/// Perform a masked cmpxchg using a target-specific intrinsic. This		/// Perform a masked cmpxchg using a target-specific intrinsic. This
/// represents the core LL/SC loop which will be lowered at a late stage by		/// represents the core LL/SC loop which will be lowered at a late stage by
/// the backend.		/// the backend.
virtual Value *emitMaskedAtomicCmpXchgIntrinsic(		virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,		IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,
Value CmpVal, Value NewVal, Value *Mask, AtomicOrdering Ord) const {		Value CmpVal, Value NewVal, Value *Mask, AtomicOrdering Ord) const {
llvm_unreachable("Masked cmpxchg expansion unimplemented on this target");		llvm_unreachable("Masked cmpxchg expansion unimplemented on this target");
}		}
▲ Show 20 Lines • Show All 2,870 Lines • Show Last 20 Lines

llvm/include/llvm/IR/IntrinsicsX86.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 56 Lines • ▼ Show 20 Lines
	}			}

	// Read processor ID.			// Read processor ID.
	let TargetPrefix = "x86" in {			let TargetPrefix = "x86" in {
	def int_x86_rdpid : GCCBuiltin<"__builtin_ia32_rdpid">,			def int_x86_rdpid : GCCBuiltin<"__builtin_ia32_rdpid">,
	Intrinsic<[llvm_i32_ty], [], []>;			Intrinsic<[llvm_i32_ty], [], []>;
	}			}

				// Lock bit test.
				let TargetPrefix = "x86" in {
				def int_x86_atomic_bts : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty],
				craig.topperUnsubmitted Done Reply Inline Actions Probably better as int_x86_atomic_bitttest? craig.topper: Probably better as int_x86_atomic_bitttest?
				[ImmArg<ArgIndex<1>>]>;
				def int_x86_atomic_btc : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty],
				[ImmArg<ArgIndex<1>>]>;
				def int_x86_atomic_btr : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i8_ty],
				[ImmArg<ArgIndex<1>>]>;
				}

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// CET SS			// CET SS
	let TargetPrefix = "x86" in {			let TargetPrefix = "x86" in {
	def int_x86_incsspd : GCCBuiltin<"__builtin_ia32_incsspd">,			def int_x86_incsspd : GCCBuiltin<"__builtin_ia32_incsspd">,
	Intrinsic<[], [llvm_i32_ty], []>;			Intrinsic<[], [llvm_i32_ty], []>;
	def int_x86_incsspq : GCCBuiltin<"__builtin_ia32_incsspq">,			def int_x86_incsspq : GCCBuiltin<"__builtin_ia32_incsspq">,
	Intrinsic<[], [llvm_i64_ty], []>;			Intrinsic<[], [llvm_i64_ty], []>;
	def int_x86_rdsspd : GCCBuiltin<"__builtin_ia32_rdsspd">,			def int_x86_rdsspd : GCCBuiltin<"__builtin_ia32_rdsspd">,
	▲ Show 20 Lines • Show All 5,796 Lines • Show Last 20 Lines

llvm/lib/CodeGen/AtomicExpandPass.cpp

Show First 20 Lines • Show All 615 Lines • ▼ Show 20 Lines	if (ValueSize < MinCASSize) {
expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);		expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
}		}
return true;		return true;
}		}
case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {		case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
expandAtomicRMWToMaskedIntrinsic(AI);		expandAtomicRMWToMaskedIntrinsic(AI);
return true;		return true;
}		}
		case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
		TLI->emitBitTestAtomicRMWIntrinsic(AI);
		return true;
		}
default:		default:
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");		llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}		}
}		}

namespace {		namespace {

struct PartwordMaskValues {		struct PartwordMaskValues {
▲ Show 20 Lines • Show All 1,280 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86ISelLowering.h

Show First 20 Lines • Show All 781 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {

/// LOCK-prefixed arithmetic read-modify-write instructions.		/// LOCK-prefixed arithmetic read-modify-write instructions.
/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)		/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
LADD,		LADD,
LSUB,		LSUB,
LOR,		LOR,
LXOR,		LXOR,
LAND,		LAND,
		LBTS,
		LBTC,
		LBTR,

// Load, scalar_to_vector, and zero extend.		// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,		VZEXT_LOAD,

// extract_vector_elt, store.		// extract_vector_elt, store.
VEXTRACT_STORE,		VEXTRACT_STORE,

// scalar broadcast from memory.		// scalar broadcast from memory.
▲ Show 20 Lines • Show All 837 Lines • ▼ Show 20 Lines	private:

const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;		const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;

TargetLoweringBase::AtomicExpansionKind		TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;		shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;		bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind		TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;		shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
		TargetLoweringBase::AtomicExpansionKind
		shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
		void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;

LoadInst *		LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;		lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;

bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;		bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;		bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;

bool needsCmpXchgNb(Type *MemType) const;		bool needsCmpXchgNb(Type *MemType) const;
▲ Show 20 Lines • Show All 130 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,436 Lines • ▼ Show 20 Lines	if (!IntrData) {
case Intrinsic::x86_aesencwide256kl:		case Intrinsic::x86_aesencwide256kl:
case Intrinsic::x86_aesdecwide256kl:		case Intrinsic::x86_aesdecwide256kl:
Info.opc = ISD::INTRINSIC_W_CHAIN;		Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = I.getArgOperand(0);		Info.ptrVal = I.getArgOperand(0);
Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);		Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
Info.align = Align(1);		Info.align = Align(1);
Info.flags \|= MachineMemOperand::MOLoad;		Info.flags \|= MachineMemOperand::MOLoad;
return true;		return true;
		case Intrinsic::x86_atomic_bts:
		case Intrinsic::x86_atomic_btc:
		case Intrinsic::x86_atomic_btr: {
		Info.opc = ISD::INTRINSIC_W_CHAIN;
		Info.ptrVal = I.getArgOperand(0);
		unsigned Size = I.getType()->getScalarSizeInBits();
		craig.topperUnsubmitted Not Done Reply Inline Actions Does something check natural alignment before we create the intrinsic? craig.topper: Does something check natural alignment before we create the intrinsic?
		pengfeiAuthorUnsubmitted Done Reply Inline Actions Yes, it's checked by `atomicSizeSupported` in AtomicExpandPass.cpp pengfei: Yes, it's checked by `atomicSizeSupported` in AtomicExpandPass.cpp
		Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);
		craig.topperUnsubmitted Not Done Reply Inline Actions Does this need MOVolatile? craig.topper: Does this need MOVolatile?
		pengfeiAuthorUnsubmitted Done Reply Inline Actions Maybe not. `MOVolatile` is usually set when `isVolatile()` return true in load and store instructions. We don't have such info in a target intrinsic. But seems we never worried about it, and there're few place it is set in X86 code. So I guess X86 is fine with or without `MOVolatile`? pengfei: Maybe not. `MOVolatile` is usually set when `isVolatile()` return true in load and store…
		craig.topperUnsubmitted Not Done Reply Inline Actions This information is used to create a MachineMemOperand. I think that MachineMemOperand needs to know this access can't undergo certain optimizations because it represents an atomic access. I don't think this interface can create an atomic MachineMemOperand, but it can create a volatile one. craig.topper: This information is used to create a MachineMemOperand. I think that MachineMemOperand needs to…
		pengfeiAuthorUnsubmitted Done Reply Inline Actions `volatile` is an memory attribute which is not atomic exclusive. https://godbolt.org/z/1o9Y7j5xe We have ignored it on all existing target memory intrinsics, so I think we don't need any special handling here. pengfei: `volatile` is an memory attribute which is not atomic exclusive. https://godbolt.
		craig.topperUnsubmitted Not Done Reply Inline Actions Do the LOCK_BTS/BTR/BTC show up in MIR with "monotonic"(or other atomic ordering) in their memory operand printing in MIR? If not you have a bug waiting to happen. I think "volatile" is stronger than any of the atomic orderings. So it works in place of them. RISCV uses MOVolatile in getTgtMemIntrinsic for riscv_masked_atomicrmw* craig.topper: Do the LOCK_BTS/BTR/BTC show up in MIR with "monotonic"(or other atomic ordering) in their…
		craig.topperUnsubmitted Not Done Reply Inline Actions The important thing is that the isUnordered() method on the MachineMemOperand must return false if the original atomicrmw was ordered. craig.topper: The important thing is that the isUnordered() method on the MachineMemOperand must return false…
		pengfeiAuthorUnsubmitted Done Reply Inline Actions I see. Yeah, I have worry on the same thing. That's why I didn't use target intrinsic at beginning. I'm glad it solves my problems. Thanks Craig! pengfei: I see. Yeah, I have worry on the same thing. That's why I didn't use target intrinsic at…
		Info.align = Align(Size);
		Info.flags \|= MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
		MachineMemOperand::MOVolatile;
		return true;
		}
}		}
return false;		return false;
}		}

switch (IntrData->Type) {		switch (IntrData->Type) {
case TRUNCATE_TO_MEM_VI8:		case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:		case TRUNCATE_TO_MEM_VI16:
case TRUNCATE_TO_MEM_VI32: {		case TRUNCATE_TO_MEM_VI32: {
▲ Show 20 Lines • Show All 22,052 Lines • ▼ Show 20 Lines	case Intrinsic::x86_testui: {
SDLoc dl(Op);		SDLoc dl(Op);
SDValue Chain = Op.getOperand(0);		SDValue Chain = Op.getOperand(0);
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);		SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
SDValue Operation = DAG.getNode(X86ISD::TESTUI, dl, VTs, Chain);		SDValue Operation = DAG.getNode(X86ISD::TESTUI, dl, VTs, Chain);
SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);		SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG);
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,		return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,
Operation.getValue(1));		Operation.getValue(1));
}		}
		case Intrinsic::x86_atomic_bts:
		case Intrinsic::x86_atomic_btc:
		case Intrinsic::x86_atomic_btr: {
		SDLoc DL(Op);
		MVT VT = Op.getSimpleValueType();
		SDValue Chain = Op.getOperand(0);
		SDValue Op1 = Op.getOperand(2);
		SDValue Op2 = Op.getOperand(3);
		unsigned Opc = IntNo == Intrinsic::x86_atomic_bts ? X86ISD::LBTS
		: IntNo == Intrinsic::x86_atomic_btc ? X86ISD::LBTC
		: X86ISD::LBTR;
		SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32);
		MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();
		SDValue Res =
		DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::i32, MVT::Other),
		{Chain, Op1, Op2, Size}, VT, MMO);
		Chain = Res.getValue(1);
		Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT);
		unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
		if (Imm)
		Res = DAG.getNode(ISD::SHL, DL, VT, Res,
		DAG.getShiftAmountConstant(Imm, VT, DL));
		return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain);
		}
}		}
return SDValue();		return SDValue();
}		}

SDLoc dl(Op);		SDLoc dl(Op);
switch(IntrData->Type) {		switch(IntrData->Type) {
default: llvm_unreachable("Unknown Intrinsic Type");		default: llvm_unreachable("Unknown Intrinsic Type");
case RDSEED:		case RDSEED:
▲ Show 20 Lines • Show All 2,906 Lines • ▼ Show 20 Lines	if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
(Subtarget.hasSSE1() \|\| Subtarget.hasX87()))		(Subtarget.hasSSE1() \|\| Subtarget.hasX87()))
return AtomicExpansionKind::None;		return AtomicExpansionKind::None;

return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg		return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
: AtomicExpansionKind::None;		: AtomicExpansionKind::None;
}		}

TargetLowering::AtomicExpansionKind		TargetLowering::AtomicExpansionKind
		X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
		// If the atomicrmw's result isn't actually used, we can just add a "lock"
		// prefix to a normal instruction for these operations.
		if (AI->use_empty())
		return AtomicExpansionKind::None;

		// If the atomicrmw's result is used by a single bit AND, we may use
		// bts/btr/btc instruction for these operations.
		auto *C1 = dyn_cast<ConstantInt>(AI->getValOperand());
		Instruction *I = AI->user_back();
		if (!C1 \|\| !AI->hasOneUse() \|\| I->getOpcode() != Instruction::And \|\|
		AI->getParent() != I->getParent())
		return AtomicExpansionKind::CmpXChg;
		// The following instruction must be a AND single bit.
		auto *C2 = dyn_cast<ConstantInt>(I->getOperand(1));
		unsigned Bits = AI->getType()->getPrimitiveSizeInBits();
		if (!C2 \|\| Bits == 8 \|\| !isPowerOf2_64(C2->getZExtValue()))
		return AtomicExpansionKind::CmpXChg;

		if (AI->getOperation() == AtomicRMWInst::And)
		return ~C1->getValue() == C2->getValue()
		? AtomicExpansionKind::BitTestIntrinsic
		: AtomicExpansionKind::CmpXChg;

		return C1 == C2 ? AtomicExpansionKind::BitTestIntrinsic
		: AtomicExpansionKind::CmpXChg;
		}

		void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
		IRBuilder<> Builder(AI);
		Intrinsic::ID IID = Intrinsic::not_intrinsic;
		switch (AI->getOperation()) {
		default:
		llvm_unreachable("Unknown atomic operation");
		case AtomicRMWInst::Or:
		IID = Intrinsic::x86_atomic_bts;
		LuoYuankeUnsubmitted Not Done Reply Inline Actions Use enum { BIT_SET, BIT_CLEAR, BIT_RESET} to replace {0, 1, 2}? LuoYuanke: Use enum { BIT_SET, BIT_CLEAR, BIT_RESET} to replace {0, 1, 2}?
		pengfeiAuthorUnsubmitted Done Reply Inline Actions There's no other user, so I just add a comment here. pengfei: There's no other user, so I just add a comment here.
		break;
		case AtomicRMWInst::Xor:
		IID = Intrinsic::x86_atomic_btc;
		break;
		case AtomicRMWInst::And:
		IID = Intrinsic::x86_atomic_btr;
		break;
		}
		Instruction *I = AI->user_back();
		LuoYuankeUnsubmitted Not Done Reply Inline Actions Add comments that there is only 1 user checked in shouldExpandLogicAtomicRMWInIR()? LuoYuanke: Add comments that there is only 1 user checked in shouldExpandLogicAtomicRMWInIR()?
		pengfeiAuthorUnsubmitted Done Reply Inline Actions There is it at line 30466. pengfei: There is it at line 30466.
		LLVMContext &Ctx = AI->getContext();
		unsigned Imm =
		countTrailingZeros(cast<ConstantInt>(I->getOperand(1))->getZExtValue());
		Function *BitTest =
		Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());
		craig.topperUnsubmitted Done Reply Inline Actions Does `AI->getContext()` work? craig.topper: Does `AI->getContext()` work?
		Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(),
		Type::getInt8PtrTy(Ctx));
		Value *Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)});
		I->replaceAllUsesWith(Result);
		I->eraseFromParent();
		AI->eraseFromParent();
		craig.topperUnsubmitted Done Reply Inline Actions CreateBitCast -> createPointerCast craig.topper: CreateBitCast -> createPointerCast
		craig.topperUnsubmitted Not Done Reply Inline Actions Probably doesn't really matter, but you should pass the address space to getInt8PtrTy. craig.topper: Probably doesn't really matter, but you should pass the address space to getInt8PtrTy.
		pengfeiAuthorUnsubmitted Done Reply Inline Actions I didn't see we pass address space when using `CreatePointerCast`, I guess the address space keep unchanged? pengfei: I didn't see we pass address space when using `CreatePointerCast`, I guess the address space…
		}

		craig.topperUnsubmitted Done Reply Inline Actions ConstantInt::get(Type::getInt8Ty(Ctx), Imm) -> Builder.getInt8(Imm) craig.topper: ConstantInt::get(Type::getInt8Ty(Ctx), Imm) -> Builder.getInt8(Imm)
		TargetLowering::AtomicExpansionKind
		craig.topperUnsubmitted Not Done Reply Inline Actions Builder.getInt32(SCR) craig.topper: Builder.getInt32(SCR)
X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {		X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;		unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
Type *MemType = AI->getType();		Type *MemType = AI->getType();

// If the operand is too big, we must see if cmpxchg8/16b is available		// If the operand is too big, we must see if cmpxchg8/16b is available
// and default to library calls otherwise.		// and default to library calls otherwise.
if (MemType->getPrimitiveSizeInBits() > NativeWidth) {		if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg		return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
: AtomicExpansionKind::None;		: AtomicExpansionKind::None;
}		}

AtomicRMWInst::BinOp Op = AI->getOperation();		AtomicRMWInst::BinOp Op = AI->getOperation();
switch (Op) {		switch (Op) {
default:		default:
llvm_unreachable("Unknown atomic operation");		llvm_unreachable("Unknown atomic operation");
case AtomicRMWInst::Xchg:		case AtomicRMWInst::Xchg:
case AtomicRMWInst::Add:		case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:		case AtomicRMWInst::Sub:
// It's better to use xadd, xsub or xchg for these in all cases.		// It's better to use xadd, xsub or xchg for these in all cases.
return AtomicExpansionKind::None;		return AtomicExpansionKind::None;
case AtomicRMWInst::Or:		case AtomicRMWInst::Or:
case AtomicRMWInst::And:		case AtomicRMWInst::And:
case AtomicRMWInst::Xor:		case AtomicRMWInst::Xor:
// If the atomicrmw's result isn't actually used, we can just add a "lock"		return shouldExpandLogicAtomicRMWInIR(AI);
// prefix to a normal instruction for these operations.
return !AI->use_empty() ? AtomicExpansionKind::CmpXChg
: AtomicExpansionKind::None;
case AtomicRMWInst::Nand:		case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:		case AtomicRMWInst::Max:
case AtomicRMWInst::Min:		case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:		case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:		case AtomicRMWInst::UMin:
case AtomicRMWInst::FAdd:		case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:		case AtomicRMWInst::FSub:
// These always require a non-trivial set of data operations on x86. We must		// These always require a non-trivial set of data operations on x86. We must
▲ Show 20 Lines • Show All 2,464 Lines • ▼ Show 20 Lines	#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE;
NODE_NAME_CASE(LCMPXCHG8_DAG)		NODE_NAME_CASE(LCMPXCHG8_DAG)
NODE_NAME_CASE(LCMPXCHG16_DAG)		NODE_NAME_CASE(LCMPXCHG16_DAG)
NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)		NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG)
NODE_NAME_CASE(LADD)		NODE_NAME_CASE(LADD)
NODE_NAME_CASE(LSUB)		NODE_NAME_CASE(LSUB)
NODE_NAME_CASE(LOR)		NODE_NAME_CASE(LOR)
NODE_NAME_CASE(LXOR)		NODE_NAME_CASE(LXOR)
NODE_NAME_CASE(LAND)		NODE_NAME_CASE(LAND)
		NODE_NAME_CASE(LBTS)
		NODE_NAME_CASE(LBTC)
		NODE_NAME_CASE(LBTR)
NODE_NAME_CASE(VZEXT_MOVL)		NODE_NAME_CASE(VZEXT_MOVL)
NODE_NAME_CASE(VZEXT_LOAD)		NODE_NAME_CASE(VZEXT_LOAD)
NODE_NAME_CASE(VEXTRACT_STORE)		NODE_NAME_CASE(VEXTRACT_STORE)
NODE_NAME_CASE(VTRUNC)		NODE_NAME_CASE(VTRUNC)
NODE_NAME_CASE(VTRUNCS)		NODE_NAME_CASE(VTRUNCS)
NODE_NAME_CASE(VTRUNCUS)		NODE_NAME_CASE(VTRUNCUS)
NODE_NAME_CASE(VMTRUNC)		NODE_NAME_CASE(VMTRUNC)
NODE_NAME_CASE(VMTRUNCS)		NODE_NAME_CASE(VMTRUNCS)
▲ Show 20 Lines • Show All 22,592 Lines • Show Last 20 Lines

llvm/lib/Target/X86/X86InstrCompiler.td

Show First 20 Lines • Show All 833 Lines • ▼ Show 20 Lines	let Predicates = [UseIncDec] in {
def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>;		def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>;
def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>;		def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>;
def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>;		def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>;
def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>;		def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>;
def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>;		def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>;
def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;		def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
}		}

		// Atomic bit test.
		def X86LBTest : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>,
		SDTCisVT<2, i8>, SDTCisVT<3, i32>]>;
		def x86bts : SDNode<"X86ISD::LBTS", X86LBTest,
		[SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
		def x86btc : SDNode<"X86ISD::LBTC", X86LBTest,
		craig.topperUnsubmitted Done Reply Inline Actions Why no SDNPMayLoad? craig.topper: Why no SDNPMayLoad?
		[SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
		LuoYuankeUnsubmitted Done Reply Inline Actions Does LOG mean logic? Rename to ATOMIC_OP? SCR means "set, clear reset"? Maybe add comments for it, so that it is well understood. LuoYuanke: Does LOG mean logic? Rename to ATOMIC_OP? SCR means "set, clear reset"? Maybe add comments for…
		craig.topperUnsubmitted Done Reply Inline Actions Can we use 3 separate intrinsics? I'm not sure this made up encoding is saving much. craig.topper: Can we use 3 separate intrinsics? I'm not sure this made up encoding is saving much.
		def x86btr : SDNode<"X86ISD::LBTR", X86LBTest,
		[SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;

		multiclass ATOMIC_LOGIC_OP<Format Form, string s> {
		let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
		SchedRW = [WriteBitTestSetRegRMW] in {
		def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i8imm:$src2),
		!strconcat(s, "{w}\t{$src2, $src1\|$src1, $src2}"),
		[(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 16)))]>,
		OpSize16, TB, LOCK;
		def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i8imm:$src2),
		!strconcat(s, "{l}\t{$src2, $src1\|$src1, $src2}"),
		[(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 32)))]>,
		OpSize32, TB, LOCK;
		def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i8imm:$src2),
		!strconcat(s, "{q}\t{$src2, $src1\|$src1, $src2}"),
		[(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 64)))]>,
		TB, LOCK;
		}
		}

		defm LOCK_BTS : ATOMIC_LOGIC_OP<MRM5m, "bts">;
		defm LOCK_BTC : ATOMIC_LOGIC_OP<MRM7m, "btc">;
		defm LOCK_BTR : ATOMIC_LOGIC_OP<MRM6m, "btr">;

// Atomic compare and swap.		// Atomic compare and swap.
multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,		multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
string mnemonic, SDPatternOperator frag> {		string mnemonic, SDPatternOperator frag> {
let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {		let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
let Defs = [AL, EFLAGS], Uses = [AL] in		let Defs = [AL, EFLAGS], Uses = [AL] in
def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),		def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
!strconcat(mnemonic, "{b}\t{$swap, $ptr\|$ptr, $swap}"),		!strconcat(mnemonic, "{b}\t{$swap, $ptr\|$ptr, $swap}"),
[(frag addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;		[(frag addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
▲ Show 20 Lines • Show All 1,371 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/atomic-bit-test.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown \| FileCheck %s --check-prefix=X86		; RUN: llc < %s -mtriple=i686-unknown-unknown \| FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown \| FileCheck %s --check-prefix=X64		; RUN: llc < %s -mtriple=x86_64-unknown-unknown \| FileCheck %s --check-prefix=X64

@v16 = dso_local global i16 0, align 2		@v16 = dso_local global i16 0, align 2
@v32 = dso_local global i32 0, align 4		@v32 = dso_local global i32 0, align 4
@v64 = dso_local global i64 0, align 8		@v64 = dso_local global i64 0, align 8

define i16 @bts1() nounwind {		define i16 @bts1() nounwind {
; X86-LABEL: bts1:		; X86-LABEL: bts1:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movzwl v16, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btsw $0, v16
; X86-NEXT: .LBB0_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orl $1, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: lock cmpxchgw %cx, v16
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $1, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax		; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: bts1:		; X64-LABEL: bts1:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movzwl v16(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btsw $0, v16(%rip)
; X64-NEXT: .LBB0_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: orl $1, %ecx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB0_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $1, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax		; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2		%0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2
%and = and i16 %0, 1		%and = and i16 %0, 1
ret i16 %and		ret i16 %and
}		}

define i16 @bts2() nounwind {		define i16 @bts2() nounwind {
; X86-LABEL: bts2:		; X86-LABEL: bts2:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movzwl v16, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btsw $1, v16
; X86-NEXT: .LBB1_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: addl %eax, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orl $2, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: lock cmpxchgw %cx, v16
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB1_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $2, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax		; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: bts2:		; X64-LABEL: bts2:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movzwl v16(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btsw $1, v16(%rip)
; X64-NEXT: .LBB1_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: addl %eax, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: orl $2, %ecx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB1_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $2, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax		; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw or i16* @v16, i16 2 monotonic, align 2		%0 = atomicrmw or i16* @v16, i16 2 monotonic, align 2
%and = and i16 %0, 2		%and = and i16 %0, 2
ret i16 %and		ret i16 %and
}		}

define i16 @bts15() nounwind {		define i16 @bts15() nounwind {
; X86-LABEL: bts15:		; X86-LABEL: bts15:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movzwl v16, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btsw $15, v16
; X86-NEXT: .LBB2_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: shll $15, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orl $32768, %ecx # imm = 0x8000
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: lock cmpxchgw %cx, v16
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB2_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $32768, %eax # imm = 0x8000
; X86-NEXT: # kill: def $ax killed $ax killed $eax		; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: bts15:		; X64-LABEL: bts15:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movzwl v16(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btsw $15, v16(%rip)
; X64-NEXT: .LBB2_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: shll $15, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: orl $32768, %ecx # imm = 0x8000
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB2_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $32768, %eax # imm = 0x8000
; X64-NEXT: # kill: def $ax killed $ax killed $eax		; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw or i16* @v16, i16 32768 monotonic, align 2		%0 = atomicrmw or i16* @v16, i16 32768 monotonic, align 2
%and = and i16 %0, 32768		%and = and i16 %0, 32768
ret i16 %and		ret i16 %and
}		}

define i32 @bts31() nounwind {		define i32 @bts31() nounwind {
; X86-LABEL: bts31:		; X86-LABEL: bts31:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movl v32, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btsl $31, v32
; X86-NEXT: .LBB3_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: shll $31, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orl $-2147483648, %ecx # imm = 0x80000000
; X86-NEXT: lock cmpxchgl %ecx, v32
; X86-NEXT: jne .LBB3_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: bts31:		; X64-LABEL: bts31:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movl v32(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btsl $31, v32(%rip)
; X64-NEXT: .LBB3_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: shll $31, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: orl $-2147483648, %ecx # imm = 0x80000000
; X64-NEXT: lock cmpxchgl %ecx, v32(%rip)
; X64-NEXT: jne .LBB3_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw or i32* @v32, i32 2147483648 monotonic, align 4		%0 = atomicrmw or i32* @v32, i32 2147483648 monotonic, align 4
%and = and i32 %0, 2147483648		%and = and i32 %0, 2147483648
ret i32 %and		ret i32 %and
}		}

define i64 @bts63() nounwind {		define i64 @bts63() nounwind {
Show All 16 Lines
; X86-NEXT: andl %esi, %edx		; X86-NEXT: andl %esi, %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: popl %ebx		; X86-NEXT: popl %ebx
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: bts63:		; X64-LABEL: bts63:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: movq v64(%rip), %rax		; X64-NEXT: lock btsq $63, v64(%rip)
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: setb %al
; X64-NEXT: .LBB4_1: # %atomicrmw.start		; X64-NEXT: shlq $63, %rax
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movq %rax, %rdx
; X64-NEXT: orq %rcx, %rdx
; X64-NEXT: lock cmpxchgq %rdx, v64(%rip)
; X64-NEXT: jne .LBB4_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andq %rcx, %rax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw or i64* @v64, i64 -9223372036854775808 monotonic, align 8		%0 = atomicrmw or i64* @v64, i64 -9223372036854775808 monotonic, align 8
%and = and i64 %0, -9223372036854775808		%and = and i64 %0, -9223372036854775808
ret i64 %and		ret i64 %and
}		}

define i16 @btc1() nounwind {		define i16 @btc1() nounwind {
; X86-LABEL: btc1:		; X86-LABEL: btc1:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movzwl v16, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btcw $0, v16
; X86-NEXT: .LBB5_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorl $1, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: lock cmpxchgw %cx, v16
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB5_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $1, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax		; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: btc1:		; X64-LABEL: btc1:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movzwl v16(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btcw $0, v16(%rip)
; X64-NEXT: .LBB5_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: xorl $1, %ecx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB5_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $1, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax		; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw xor i16* @v16, i16 1 monotonic, align 2		%0 = atomicrmw xor i16* @v16, i16 1 monotonic, align 2
%and = and i16 %0, 1		%and = and i16 %0, 1
ret i16 %and		ret i16 %and
}		}

define i16 @btc2() nounwind {		define i16 @btc2() nounwind {
; X86-LABEL: btc2:		; X86-LABEL: btc2:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movzwl v16, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btcw $1, v16
; X86-NEXT: .LBB6_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: addl %eax, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorl $2, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: lock cmpxchgw %cx, v16
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB6_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $2, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax		; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: btc2:		; X64-LABEL: btc2:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movzwl v16(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btcw $1, v16(%rip)
; X64-NEXT: .LBB6_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: addl %eax, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: xorl $2, %ecx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB6_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $2, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax		; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw xor i16* @v16, i16 2 monotonic, align 2		%0 = atomicrmw xor i16* @v16, i16 2 monotonic, align 2
%and = and i16 %0, 2		%and = and i16 %0, 2
ret i16 %and		ret i16 %and
}		}

define i16 @btc15() nounwind {		define i16 @btc15() nounwind {
; X86-LABEL: btc15:		; X86-LABEL: btc15:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movzwl v16, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btcw $15, v16
; X86-NEXT: .LBB7_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: shll $15, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorl $32768, %ecx # imm = 0x8000
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: lock cmpxchgw %cx, v16
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB7_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $32768, %eax # imm = 0x8000
; X86-NEXT: # kill: def $ax killed $ax killed $eax		; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: btc15:		; X64-LABEL: btc15:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movzwl v16(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btcw $15, v16(%rip)
; X64-NEXT: .LBB7_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: shll $15, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: xorl $32768, %ecx # imm = 0x8000
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB7_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $32768, %eax # imm = 0x8000
; X64-NEXT: # kill: def $ax killed $ax killed $eax		; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw xor i16* @v16, i16 32768 monotonic, align 2		%0 = atomicrmw xor i16* @v16, i16 32768 monotonic, align 2
%and = and i16 %0, 32768		%and = and i16 %0, 32768
ret i16 %and		ret i16 %and
}		}

define i32 @btc31() nounwind {		define i32 @btc31() nounwind {
; X86-LABEL: btc31:		; X86-LABEL: btc31:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movl v32, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btcl $31, v32
; X86-NEXT: .LBB8_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: shll $31, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorl $-2147483648, %ecx # imm = 0x80000000
; X86-NEXT: lock cmpxchgl %ecx, v32
; X86-NEXT: jne .LBB8_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: btc31:		; X64-LABEL: btc31:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movl v32(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btcl $31, v32(%rip)
; X64-NEXT: .LBB8_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: shll $31, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: xorl $-2147483648, %ecx # imm = 0x80000000
; X64-NEXT: lock cmpxchgl %ecx, v32(%rip)
; X64-NEXT: jne .LBB8_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw xor i32* @v32, i32 2147483648 monotonic, align 4		%0 = atomicrmw xor i32* @v32, i32 2147483648 monotonic, align 4
%and = and i32 %0, 2147483648		%and = and i32 %0, 2147483648
ret i32 %and		ret i32 %and
}		}

define i64 @btc63() nounwind {		define i64 @btc63() nounwind {
Show All 16 Lines
; X86-NEXT: andl %esi, %edx		; X86-NEXT: andl %esi, %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: popl %ebx		; X86-NEXT: popl %ebx
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: btc63:		; X64-LABEL: btc63:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: movq v64(%rip), %rax		; X64-NEXT: lock btcq $63, v64(%rip)
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: setb %al
; X64-NEXT: .LBB9_1: # %atomicrmw.start		; X64-NEXT: shlq $63, %rax
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movq %rax, %rdx
; X64-NEXT: xorq %rcx, %rdx
; X64-NEXT: lock cmpxchgq %rdx, v64(%rip)
; X64-NEXT: jne .LBB9_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andq %rcx, %rax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw xor i64* @v64, i64 -9223372036854775808 monotonic, align 8		%0 = atomicrmw xor i64* @v64, i64 -9223372036854775808 monotonic, align 8
%and = and i64 %0, -9223372036854775808		%and = and i64 %0, -9223372036854775808
ret i64 %and		ret i64 %and
}		}

define i16 @btr1() nounwind {		define i16 @btr1() nounwind {
; X86-LABEL: btr1:		; X86-LABEL: btr1:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movzwl v16, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btrw $0, v16
; X86-NEXT: .LBB10_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $65534, %ecx # imm = 0xFFFE
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: lock cmpxchgw %cx, v16
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB10_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $1, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax		; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: btr1:		; X64-LABEL: btr1:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movzwl v16(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btrw $0, v16(%rip)
; X64-NEXT: .LBB10_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $65534, %ecx # imm = 0xFFFE
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB10_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $1, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax		; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw and i16* @v16, i16 -2 monotonic, align 2		%0 = atomicrmw and i16* @v16, i16 -2 monotonic, align 2
%and = and i16 %0, 1		%and = and i16 %0, 1
ret i16 %and		ret i16 %and
}		}

define i16 @btr2() nounwind {		define i16 @btr2() nounwind {
; X86-LABEL: btr2:		; X86-LABEL: btr2:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movzwl v16, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btrw $1, v16
; X86-NEXT: .LBB11_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: addl %eax, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $65533, %ecx # imm = 0xFFFD
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: lock cmpxchgw %cx, v16
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB11_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $2, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax		; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: btr2:		; X64-LABEL: btr2:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movzwl v16(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btrw $1, v16(%rip)
; X64-NEXT: .LBB11_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: addl %eax, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $65533, %ecx # imm = 0xFFFD
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB11_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $2, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax		; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw and i16* @v16, i16 -3 monotonic, align 2		%0 = atomicrmw and i16* @v16, i16 -3 monotonic, align 2
%and = and i16 %0, 2		%and = and i16 %0, 2
ret i16 %and		ret i16 %and
}		}

define i16 @btr15() nounwind {		define i16 @btr15() nounwind {
; X86-LABEL: btr15:		; X86-LABEL: btr15:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movzwl v16, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btrw $15, v16
; X86-NEXT: .LBB12_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: shll $15, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: lock cmpxchgw %cx, v16
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB12_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $32768, %eax # imm = 0x8000
; X86-NEXT: # kill: def $ax killed $ax killed $eax		; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: btr15:		; X64-LABEL: btr15:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movzwl v16(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btrw $15, v16(%rip)
; X64-NEXT: .LBB12_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: shll $15, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $32767, %ecx # imm = 0x7FFF
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
; X64-NEXT: # kill: def $ax killed $ax def $eax
; X64-NEXT: jne .LBB12_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $32768, %eax # imm = 0x8000
; X64-NEXT: # kill: def $ax killed $ax killed $eax		; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw and i16* @v16, i16 32767 monotonic, align 2		%0 = atomicrmw and i16* @v16, i16 32767 monotonic, align 2
%and = and i16 %0, 32768		%and = and i16 %0, 32768
ret i16 %and		ret i16 %and
}		}

define i32 @btr31() nounwind {		define i32 @btr31() nounwind {
; X86-LABEL: btr31:		; X86-LABEL: btr31:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movl v32, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btrl $31, v32
; X86-NEXT: .LBB13_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: shll $31, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
; X86-NEXT: lock cmpxchgl %ecx, v32
; X86-NEXT: jne .LBB13_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $-2147483648, %eax # imm = 0x80000000
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: btr31:		; X64-LABEL: btr31:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movl v32(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btrl $31, v32(%rip)
; X64-NEXT: .LBB13_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: shll $31, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
; X64-NEXT: lock cmpxchgl %ecx, v32(%rip)
; X64-NEXT: jne .LBB13_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $-2147483648, %eax # imm = 0x80000000
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw and i32* @v32, i32 2147483647 monotonic, align 4		%0 = atomicrmw and i32* @v32, i32 2147483647 monotonic, align 4
%and = and i32 %0, 2147483648		%and = and i32 %0, 2147483648
ret i32 %and		ret i32 %and
}		}

define i64 @btr63() nounwind {		define i64 @btr63() nounwind {
Show All 22 Lines
; X86-NEXT: andl %esi, %edx		; X86-NEXT: andl %esi, %edx
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: popl %edi		; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx		; X86-NEXT: popl %ebx
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: btr63:		; X64-LABEL: btr63:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: movq v64(%rip), %rax		; X64-NEXT: lock btrq $63, v64(%rip)
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: setb %al
; X64-NEXT: .LBB14_1: # %atomicrmw.start		; X64-NEXT: shlq $63, %rax
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movq %rax, %rdx
; X64-NEXT: andq %rcx, %rdx
; X64-NEXT: lock cmpxchgq %rdx, v64(%rip)
; X64-NEXT: jne .LBB14_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: incq %rcx
; X64-NEXT: andq %rcx, %rax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw and i64* @v64, i64 9223372036854775807 monotonic, align 8		%0 = atomicrmw and i64* @v64, i64 9223372036854775807 monotonic, align 8
%and = and i64 %0, -9223372036854775808		%and = and i64 %0, -9223372036854775808
ret i64 %and		ret i64 %and
}		}

define i16 @multi_use1() nounwind {		define i16 @multi_use1() nounwind {
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines	entry:
%2 = xor i16 %0, 2		%2 = xor i16 %0, 2
%3 = or i16 %1, %2		%3 = or i16 %1, %2
ret i16 %3		ret i16 %3
}		}

define i16 @multi_use2() nounwind {		define i16 @multi_use2() nounwind {
; X86-LABEL: multi_use2:		; X86-LABEL: multi_use2:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movzwl v16, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btsw $0, v16
; X86-NEXT: .LBB16_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orl $1, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: lock cmpxchgw %cx, v16
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: jne .LBB16_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: andl $1, %eax
; X86-NEXT: leal (%eax,%eax,2), %eax		; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax		; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: multi_use2:		; X64-LABEL: multi_use2:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movzwl v16(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btsw $0, v16(%rip)
; X64-NEXT: .LBB16_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: orl $1, %ecx
; X64-NEXT: # kill: def $ax killed $ax killed $rax
; X64-NEXT: lock cmpxchgw %cx, v16(%rip)
; X64-NEXT: # kill: def $ax killed $ax def $rax
; X64-NEXT: jne .LBB16_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: andl $1, %eax
; X64-NEXT: leal (%rax,%rax,2), %eax		; X64-NEXT: leal (%rax,%rax,2), %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax		; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
entry:		entry:
%0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2		%0 = atomicrmw or i16* @v16, i16 1 monotonic, align 2
%1 = and i16 %0, 1		%1 = and i16 %0, 1
%2 = shl i16 %1, 1		%2 = shl i16 %1, 1
%3 = or i16 %1, %2		%3 = or i16 %1, %2
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	3:
ret i16 %and		ret i16 %and
}		}

declare void @foo()		declare void @foo()

define void @no_and_cmp0_fold() nounwind {		define void @no_and_cmp0_fold() nounwind {
; X86-LABEL: no_and_cmp0_fold:		; X86-LABEL: no_and_cmp0_fold:
; X86: # %bb.0: # %entry		; X86: # %bb.0: # %entry
; X86-NEXT: movl v32, %eax		; X86-NEXT: lock btsl $3, v32
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB18_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orl $8, %ecx
; X86-NEXT: lock cmpxchgl %ecx, v32
; X86-NEXT: jne .LBB18_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: testb %al, %al		; X86-NEXT: testb %al, %al
; X86-NEXT: je .LBB18_3		; X86-NEXT: je .LBB18_1
; X86-NEXT: # %bb.4: # %if.end		; X86-NEXT: # %bb.2: # %if.end
; X86-NEXT: retl		; X86-NEXT: retl
; X86-NEXT: .LBB18_3: # %if.then		; X86-NEXT: .LBB18_1: # %if.then
;		;
; X64-LABEL: no_and_cmp0_fold:		; X64-LABEL: no_and_cmp0_fold:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: movl v32(%rip), %eax		; X64-NEXT: lock btsl $3, v32(%rip)
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB18_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: orl $8, %ecx
; X64-NEXT: lock cmpxchgl %ecx, v32(%rip)
; X64-NEXT: jne .LBB18_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testb %al, %al		; X64-NEXT: testb %al, %al
; X64-NEXT: je .LBB18_3		; X64-NEXT: je .LBB18_1
; X64-NEXT: # %bb.4: # %if.end		; X64-NEXT: # %bb.2: # %if.end
; X64-NEXT: retq		; X64-NEXT: retq
; X64-NEXT: .LBB18_3: # %if.then		; X64-NEXT: .LBB18_1: # %if.then
entry:		entry:
%0 = atomicrmw or i32* @v32, i32 8 monotonic, align 4		%0 = atomicrmw or i32* @v32, i32 8 monotonic, align 4
%and = and i32 %0, 8		%and = and i32 %0, 8
%tobool = icmp ne i32 %and, 0		%tobool = icmp ne i32 %and, 0
br i1 undef, label %if.then, label %if.end		br i1 undef, label %if.then, label %if.end

if.then: ; preds = %entry		if.then: ; preds = %entry
unreachable		unreachable

if.end: ; preds = %entry		if.end: ; preds = %entry
%or.cond8 = select i1 %tobool, i1 undef, i1 false		%or.cond8 = select i1 %tobool, i1 undef, i1 false
ret void		ret void
}		}

define i32 @split_hoist_and(i32 %0) nounwind {		define i32 @split_hoist_and(i32 %0) nounwind {
; X86-LABEL: split_hoist_and:		; X86-LABEL: split_hoist_and:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl v32, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .p2align 4, 0x90		; X86-NEXT: lock btsl $3, v32
; X86-NEXT: .LBB19_1: # %atomicrmw.start		; X86-NEXT: setb %al
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: shll $3, %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: orl $8, %edx
; X86-NEXT: lock cmpxchgl %edx, v32
; X86-NEXT: jne .LBB19_1
; X86-NEXT: # %bb.2: # %atomicrmw.end
; X86-NEXT: testl %ecx, %ecx		; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: andl $8, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: split_hoist_and:		; X64-LABEL: split_hoist_and:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl v32(%rip), %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: .p2align 4, 0x90		; X64-NEXT: lock btsl $3, v32(%rip)
; X64-NEXT: .LBB19_1: # %atomicrmw.start		; X64-NEXT: setb %al
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: shll $3, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: orl $8, %ecx
; X64-NEXT: lock cmpxchgl %ecx, v32(%rip)
; X64-NEXT: jne .LBB19_1
; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: testl %edi, %edi		; X64-NEXT: testl %edi, %edi
; X64-NEXT: andl $8, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%2 = atomicrmw or i32* @v32, i32 8 monotonic, align 4		%2 = atomicrmw or i32* @v32, i32 8 monotonic, align 4
%3 = tail call i32 @llvm.ctlz.i32(i32 %0, i1 false)		%3 = tail call i32 @llvm.ctlz.i32(i32 %0, i1 false)
%4 = and i32 %2, 8		%4 = and i32 %2, 8
ret i32 %4		ret i32 %4
}		}

declare i32 @llvm.ctlz.i32(i32, i1)		declare i32 @llvm.ctlz.i32(i32, i1)

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Use bit test instructions to optimize some logic atomic operations
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 411941

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/include/llvm/IR/IntrinsicsX86.td

llvm/lib/CodeGen/AtomicExpandPass.cpp

llvm/lib/Target/X86/X86ISelLowering.h

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/lib/Target/X86/X86InstrCompiler.td

llvm/test/CodeGen/X86/atomic-bit-test.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Use bit test instructions to optimize some logic atomic operationsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 411941

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/include/llvm/IR/IntrinsicsX86.td

llvm/lib/CodeGen/AtomicExpandPass.cpp

llvm/lib/Target/X86/X86ISelLowering.h

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/lib/Target/X86/X86InstrCompiler.td

llvm/test/CodeGen/X86/atomic-bit-test.ll

[X86] Use bit test instructions to optimize some logic atomic operations
ClosedPublic