Diff 167881

lib/AsmParser/LLParser.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,719 Lines • ▼ Show 20 Lines	int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {

if (Ordering == AtomicOrdering::Unordered)		if (Ordering == AtomicOrdering::Unordered)
return TokError("atomicrmw cannot be unordered");		return TokError("atomicrmw cannot be unordered");
if (!Ptr->getType()->isPointerTy())		if (!Ptr->getType()->isPointerTy())
return Error(PtrLoc, "atomicrmw operand must be a pointer");		return Error(PtrLoc, "atomicrmw operand must be a pointer");
if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())		if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
return Error(ValLoc, "atomicrmw value and pointer type do not match");		return Error(ValLoc, "atomicrmw value and pointer type do not match");

if (!Val->getType()->isIntegerTy()) {		if (Operation != AtomicRMWInst::Xchg && !Val->getType()->isIntegerTy()) {
return Error(ValLoc, "atomicrmw " +		return Error(ValLoc, "atomicrmw " +
AtomicRMWInst::getOperationName(Operation) +		AtomicRMWInst::getOperationName(Operation) +
" operand must be an integer");		" operand must be an integer");
}		}

		if (Operation == AtomicRMWInst::Xchg &&
		!Val->getType()->isIntegerTy() &&
		!Val->getType()->isFloatingPointTy()) {
		return Error(ValLoc, "atomicrmw " +
		AtomicRMWInst::getOperationName(Operation) +
		" operand must be an integer or floating point type");
		}

unsigned Size = Val->getType()->getPrimitiveSizeInBits();		unsigned Size = Val->getType()->getPrimitiveSizeInBits();
if (Size < 8 \|\| (Size & (Size - 1)))		if (Size < 8 \|\| (Size & (Size - 1)))
return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"		return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
" integer");		" integer");

AtomicRMWInst *RMWI =		AtomicRMWInst *RMWI =
new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID);		new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID);
RMWI->setVolatile(isVolatile);		RMWI->setVolatile(isVolatile);
▲ Show 20 Lines • Show All 1,465 Lines • Show Last 20 Lines

lib/CodeGen/AtomicExpandPass.cpp

Show First 20 Lines • Show All 489 Lines • ▼ Show 20 Lines	bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
// Now we have an appropriate swap instruction, lower it as usual.		// Now we have an appropriate swap instruction, lower it as usual.
return tryExpandAtomicRMW(AI);		return tryExpandAtomicRMW(AI);
}		}

static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,		static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
Value Loaded, Value NewVal,		Value Loaded, Value NewVal,
AtomicOrdering MemOpOrder,		AtomicOrdering MemOpOrder,
Value &Success, Value &NewLoaded) {		Value &Success, Value &NewLoaded) {
		Type *OrigTy = NewVal->getType();

		bool NeedBitcast = OrigTy->isFloatingPointTy();
		jyknightUnsubmitted Not Done Reply Inline Actions Add a comment here that this code can go away if the cmpxchg instruction adds support for floating point types. jyknight: Add a comment here that this code can go away if the cmpxchg instruction adds support for…
		arsenmAuthorUnsubmitted Done Reply Inline Actions I think there might need to be a separate fcmpxchg instruction for that, unless you mean there will also be a version that treats the FP type here as integer in memory arsenm: I think there might need to be a separate fcmpxchg instruction for that, unless you mean there…
		jyknightUnsubmitted Not Done Reply Inline Actions No -- the intent is not to compare for floating-point-equality ala fcmp, but rather just as bit equality. (e.g. NaNs are equal to each-other when they have the same bit representation, and unequal if they do not) jyknight: No -- the intent is not to compare for floating-point-equality ala fcmp, but rather just as bit…
		jfbUnsubmitted Not Done Reply Inline Actions This is what various ISAs and programming languages do, so I support what James says :) jfb: This is what various ISAs and programming languages do, so I support what James says :)
		arsenmAuthorUnsubmitted Done Reply Inline Actions AMDGPU does also have fcmpxchg, which does an FP compare, with nans failing etc. arsenm: AMDGPU does also have fcmpxchg, which does an FP compare, with nans failing etc.
		jfbUnsubmitted Not Done Reply Inline Actions Off topic... but wat?!? Once you have a NaN your fcmpxchg infloops? Count me confused. jfb: Off topic... but wat?!? Once you have a NaN your fcmpxchg infloops? Count me confused.
		arsenmAuthorUnsubmitted Done Reply Inline Actions It doesn't let you put the NaN in? As long as the memory was initialized with something not-NaN before I think it works? Otherwise you're stuck with NaNs forever. I haven't tried using it, but that's my interpretation of the manual. arsenm: It doesn't let you put the NaN in? As long as the memory was initialized with something not-NaN…
		asbUnsubmitted Not Done Reply Inline Actions atomicrmw xchg with FP types makes sense - the semantics are unambiguous. Is it really worth the potential confusion of what fp cmpxchg means vs just sticking with bitcast + integer cmpxchg? asb: atomicrmw xchg with FP types makes sense - the semantics are unambiguous. Is it really worth…
		arsenmAuthorUnsubmitted Done Reply Inline Actions To be clear I think fcmpxchg is it's own operation separate from the current cmpxchg inst arsenm: To be clear I think fcmpxchg is it's own operation separate from the current cmpxchg inst
		jyknightUnsubmitted Not Done Reply Inline Actions I hope we never support "fcmpxchg" -- that is, using a floating point semantic comparison. I'm having trouble imagining when that could ever be a useful operation. Whether or not we support bitwise cmpxchg with FP types I'm pretty agnostic to. If there's some reason why it's useful to do so, we should. If there isn't, maybe we should or maybe we shouldn't. But -- to the point of this thread: there's now a comment here that this code should be removed if we do so, which is all I really wanted. :) jyknight: I hope we never support "fcmpxchg" -- that is, using a floating point semantic comparison. I'm…
		if (NeedBitcast) {
		IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
		unsigned AS = Addr->getType()->getPointerAddressSpace();
		Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
		NewVal = Builder.CreateBitCast(NewVal, IntTy);
		Loaded = Builder.CreateBitCast(Loaded, IntTy);
		}

Value* Pair = Builder.CreateAtomicCmpXchg(		Value* Pair = Builder.CreateAtomicCmpXchg(
Addr, Loaded, NewVal, MemOpOrder,		Addr, Loaded, NewVal, MemOpOrder,
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));		AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
Success = Builder.CreateExtractValue(Pair, 1, "success");		Success = Builder.CreateExtractValue(Pair, 1, "success");
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");		NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");

		if (NeedBitcast)
		NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
}		}

/// Emit IR to implement the given atomicrmw operation on values in registers,		/// Emit IR to implement the given atomicrmw operation on values in registers,
/// returning the new value.		/// returning the new value.
static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,		static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
Value Loaded, Value Inc) {		Value Loaded, Value Inc) {
Value *NewVal;		Value *NewVal;
switch (Op) {		switch (Op) {
▲ Show 20 Lines • Show All 1,230 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Show First 20 Lines • Show All 4,722 Lines • ▼ Show 20 Lines	case ISD::SCALAR_TO_VECTOR: {
for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I)		for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I)
NewElts.push_back(Undef);		NewElts.push_back(Undef);

SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts);		SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts);
SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);		SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
Results.push_back(CvtVec);		Results.push_back(CvtVec);
break;		break;
}		}
		case ISD::ATOMIC_SWAP: {
		AtomicSDNode *AM = cast<AtomicSDNode>(Node);
		SDLoc SL(Node);
		SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal());
		assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
		"unexpected promotion type");
		assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
		"unexpected atomic_swap with illegal type");

		SDValue NewAtomic
		= DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT,
		DAG.getVTList(NVT, MVT::Other),
		{ AM->getChain(), AM->getBasePtr(), CastVal },
		AM->getMemOperand());
		Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
		Results.push_back(NewAtomic.getValue(1));
		break;
		}
}		}

// Replace the original node with the legalized result.		// Replace the original node with the legalized result.
if (!Results.empty()) {		if (!Results.empty()) {
LLVM_DEBUG(dbgs() << "Successfully promoted node\n");		LLVM_DEBUG(dbgs() << "Successfully promoted node\n");
ReplaceNode(Node, Results.data());		ReplaceNode(Node, Results.data());
} else		} else
LLVM_DEBUG(dbgs() << "Could not promote node\n");		LLVM_DEBUG(dbgs() << "Could not promote node\n");
▲ Show 20 Lines • Show All 64 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Show First 20 Lines • Show All 98 Lines • ▼ Show 20 Lines	#endif
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;		case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;		case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;		case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;
case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;		case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;		case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;		case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;		case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;		case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
		case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;		case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;		case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
case ISD::SINT_TO_FP:		case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;		case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;		case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;		case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
}		}

▲ Show 20 Lines • Show All 1,812 Lines • ▼ Show 20 Lines	switch (N->getOpcode()) {
case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;		case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;		case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break;		case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break;
case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break;		case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break;

case ISD::SINT_TO_FP:		case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;		case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;		case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
		case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
}		}

if (R.getNode())		if (R.getNode())
SetPromotedFloat(SDValue(N, ResNo), R);		SetPromotedFloat(SDValue(N, ResNo), R);
}		}

// Bitcast from i16 to f16: convert the i16 to a f32 value instead.		// Bitcast from i16 to f16: convert the i16 to a f32 value instead.
// At this point, it is not possible to determine if the bitcast value is		// At this point, it is not possible to determine if the bitcast value is
▲ Show 20 Lines • Show All 217 Lines • ▼ Show 20 Lines	return DAG.getNode(
DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL)));		DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL)));
}		}

SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {		SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),		return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
N->getValueType(0)));		N->getValueType(0)));
}		}

		SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
		EVT VT = N->getValueType(0);
		EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
		jfbUnsubmitted Not Done Reply Inline Actions I don't get why only FP16 is accessible here. jfb: I don't get why only FP16 is accessible here.
		arsenmAuthorUnsubmitted Not Done Reply Inline Actions We don't support any other cases where a narrower FP type is illegal, but a wider one is. That would require a generalized version of FP16_TO_FP/FP_TO_FP16. There aren't any in tree targets that support f64 but not f32 for example. I've removed this assertion, so this will fail in GetPromotionOpcode for any other type like any other legalization would arsenm: We don't support any other cases where a narrower FP type is illegal, but a wider one is. That…

		AtomicSDNode *AM = cast<AtomicSDNode>(N);
		SDLoc SL(N);

		SDValue CastVal = BitConvertToInteger(AM->getVal());
		EVT CastVT = CastVal.getValueType();

		SDValue NewAtomic
		= DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT,
		DAG.getVTList(CastVT, MVT::Other),
		{ AM->getChain(), AM->getBasePtr(), CastVal },
		AM->getMemOperand());

		SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
		NewAtomic);
		// Legalize the chain result by replacing uses of the old value chain with the
		// new one
		ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));

		return ResultCast;

		}

lib/CodeGen/SelectionDAG/LegalizeTypes.h

Show First 20 Lines • Show All 612 Lines • ▼ Show 20 Lines	private:
SDValue PromoteFloatRes_FMAD(SDNode *N);		SDValue PromoteFloatRes_FMAD(SDNode *N);
SDValue PromoteFloatRes_FPOWI(SDNode *N);		SDValue PromoteFloatRes_FPOWI(SDNode *N);
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);		SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_LOAD(SDNode *N);		SDValue PromoteFloatRes_LOAD(SDNode *N);
SDValue PromoteFloatRes_SELECT(SDNode *N);		SDValue PromoteFloatRes_SELECT(SDNode *N);
SDValue PromoteFloatRes_SELECT_CC(SDNode *N);		SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
SDValue PromoteFloatRes_UnaryOp(SDNode *N);		SDValue PromoteFloatRes_UnaryOp(SDNode *N);
SDValue PromoteFloatRes_UNDEF(SDNode *N);		SDValue PromoteFloatRes_UNDEF(SDNode *N);
		SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N);
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);		SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);

bool PromoteFloatOperand(SDNode *N, unsigned OpNo);		bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);		SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);		SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);		SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);		SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);		SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
▲ Show 20 Lines • Show All 296 Lines • Show Last 20 Lines

lib/CodeGen/TargetLoweringBase.cpp

Show First 20 Lines • Show All 577 Lines • ▼ Show 20 Lines	void TargetLoweringBase::initActions() {
memset(LoadExtActions, 0, sizeof(LoadExtActions));		memset(LoadExtActions, 0, sizeof(LoadExtActions));
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));		memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));		memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
memset(CondCodeActions, 0, sizeof(CondCodeActions));		memset(CondCodeActions, 0, sizeof(CondCodeActions));
std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);		std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);
std::fill(std::begin(TargetDAGCombineArray),		std::fill(std::begin(TargetDAGCombineArray),
std::end(TargetDAGCombineArray), 0);		std::end(TargetDAGCombineArray), 0);

		for (MVT VT : MVT::fp_valuetypes()) {
		MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
		if (IntVT.isValid()) {
		setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
		AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
		}
		}

// Set default actions for various operations.		// Set default actions for various operations.
for (MVT VT : MVT::all_valuetypes()) {		for (MVT VT : MVT::all_valuetypes()) {
// Default all indexed load / store to expand.		// Default all indexed load / store to expand.
for (unsigned IM = (unsigned)ISD::PRE_INC;		for (unsigned IM = (unsigned)ISD::PRE_INC;
IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {		IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
setIndexedLoadAction(IM, VT, Expand);		setIndexedLoadAction(IM, VT, Expand);
setIndexedStoreAction(IM, VT, Expand);		setIndexedStoreAction(IM, VT, Expand);
}		}
▲ Show 20 Lines • Show All 1,266 Lines • Show Last 20 Lines

lib/IR/Verifier.cpp

Show First 20 Lines • Show All 3,346 Lines • ▼ Show 20 Lines	void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
Assert(RMWI.getOrdering() != AtomicOrdering::NotAtomic,		Assert(RMWI.getOrdering() != AtomicOrdering::NotAtomic,
"atomicrmw instructions must be atomic.", &RMWI);		"atomicrmw instructions must be atomic.", &RMWI);
Assert(RMWI.getOrdering() != AtomicOrdering::Unordered,		Assert(RMWI.getOrdering() != AtomicOrdering::Unordered,
"atomicrmw instructions cannot be unordered.", &RMWI);		"atomicrmw instructions cannot be unordered.", &RMWI);
auto Op = RMWI.getOperation();		auto Op = RMWI.getOperation();
PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());		PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);		Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
Type *ElTy = PTy->getElementType();		Type *ElTy = PTy->getElementType();
		if (Op == AtomicRMWInst::Xchg) {
		Assert(ElTy->isIntegerTy() \|\| ElTy->isFloatingPointTy(), "atomicrmw " +
		AtomicRMWInst::getOperationName(Op) +
		" operand must have integer or floating point type!",
		&RMWI, ElTy);
		} else {
Assert(ElTy->isIntegerTy(), "atomicrmw " +		Assert(ElTy->isIntegerTy(), "atomicrmw " +
AtomicRMWInst::getOperationName(Op) +		AtomicRMWInst::getOperationName(Op) +
" operand must have integer type!",		" operand must have integer type!",
&RMWI, ElTy);		&RMWI, ElTy);
		}
checkAtomicMemAccessSize(ElTy, &RMWI);		checkAtomicMemAccessSize(ElTy, &RMWI);
Assert(ElTy == RMWI.getOperand(1)->getType(),		Assert(ElTy == RMWI.getOperand(1)->getType(),
"Argument value type does not match pointer operand type!", &RMWI,		"Argument value type does not match pointer operand type!", &RMWI,
ElTy);		ElTy);
Assert(AtomicRMWInst::FIRST_BINOP <= Op && Op <= AtomicRMWInst::LAST_BINOP,		Assert(AtomicRMWInst::FIRST_BINOP <= Op && Op <= AtomicRMWInst::LAST_BINOP,
"Invalid binary operation!", &RMWI);		"Invalid binary operation!", &RMWI);
visitInstruction(RMWI);		visitInstruction(RMWI);
}		}
▲ Show 20 Lines • Show All 1,775 Lines • Show Last 20 Lines

lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 11,433 Lines • ▼ Show 20 Lines	return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");		Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
}		}

Type *Tys[] = { Addr->getType() };		Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int =		Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;		IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);		Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);

return Builder.CreateTruncOrBitCast(		Type *EltTy = cast<PointerType>(Addr->getType())->getElementType();
Builder.CreateCall(Ldxr, Addr),
cast<PointerType>(Addr->getType())->getElementType());		const DataLayout &DL = M->getDataLayout();
		IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(EltTy));
		Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);

		return Builder.CreateBitCast(Trunc, EltTy);
}		}

void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(		void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
IRBuilder<> &Builder) const {		IRBuilder<> &Builder) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();		Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));		Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
}		}

Show All 18 Lines	if (Val->getType()->getPrimitiveSizeInBits() == 128) {
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});		return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
}		}

Intrinsic::ID Int =		Intrinsic::ID Int =
IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;		IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
Type *Tys[] = { Addr->getType() };		Type *Tys[] = { Addr->getType() };
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);		Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);

		const DataLayout &DL = M->getDataLayout();
		IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
		Val = Builder.CreateBitCast(Val, IntValTy);

return Builder.CreateCall(Stxr,		return Builder.CreateCall(Stxr,
{Builder.CreateZExtOrBitCast(		{Builder.CreateZExtOrBitCast(
Val, Stxr->getFunctionType()->getParamType(0)),		Val, Stxr->getFunctionType()->getParamType(0)),
Addr});		Addr});
}		}

bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(		bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {		Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
▲ Show 20 Lines • Show All 136 Lines • Show Last 20 Lines

test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll

This file was added.

				; RUN: not llvm-as -disable-output %s 2>&1 \| FileCheck %s

				; CHECK: error: atomicrmw xchg operand must be an integer or floating point type
				define void @f(i32** %ptr) {
				atomicrmw xchg i32** %ptr, i32* null seq_cst
				ret void
				}

test/Bitcode/compatibility.ll

Show First 20 Lines • Show All 755 Lines • ▼ Show 20 Lines	define void @atomics(i32* %word) {
; CHECK: store atomic i32 23, i32* %word monotonic, align 4		; CHECK: store atomic i32 23, i32* %word monotonic, align 4
store atomic volatile i32 24, i32* %word monotonic, align 4		store atomic volatile i32 24, i32* %word monotonic, align 4
; CHECK: store atomic volatile i32 24, i32* %word monotonic, align 4		; CHECK: store atomic volatile i32 24, i32* %word monotonic, align 4
store atomic volatile i32 25, i32* %word syncscope("singlethread") monotonic, align 4		store atomic volatile i32 25, i32* %word syncscope("singlethread") monotonic, align 4
; CHECK: store atomic volatile i32 25, i32* %word syncscope("singlethread") monotonic, align 4		; CHECK: store atomic volatile i32 25, i32* %word syncscope("singlethread") monotonic, align 4
ret void		ret void
}		}

		define void @fp_atomics(float* %word) {
		; CHECK: %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.000000e+00 monotonic
		%atomicrmw.xchg = atomicrmw xchg float* %word, float 1.0 monotonic
		ret void
		}

;; Fast Math Flags		;; Fast Math Flags
define void @fastmathflags(float %op1, float %op2) {		define void @fastmathflags(float %op1, float %op2) {
%f.nnan = fadd nnan float %op1, %op2		%f.nnan = fadd nnan float %op1, %op2
; CHECK: %f.nnan = fadd nnan float %op1, %op2		; CHECK: %f.nnan = fadd nnan float %op1, %op2
%f.ninf = fadd ninf float %op1, %op2		%f.ninf = fadd ninf float %op1, %op2
; CHECK: %f.ninf = fadd ninf float %op1, %op2		; CHECK: %f.ninf = fadd ninf float %op1, %op2
%f.nsz = fadd nsz float %op1, %op2		%f.nsz = fadd nsz float %op1, %op2
; CHECK: %f.nsz = fadd nsz float %op1, %op2		; CHECK: %f.nsz = fadd nsz float %op1, %op2
▲ Show 20 Lines • Show All 947 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/flat_atomics.ll

	Show First 20 Lines • Show All 697 Lines • ▼ Show 20 Lines
	; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}			; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
	define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) {			define amdgpu_kernel void @atomic_xchg_i32_offset(i32* %out, i32 %in) {
	entry:			entry:
	%gep = getelementptr i32, i32* %out, i32 4			%gep = getelementptr i32, i32* %out, i32 4
	%val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst			%val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
	ret void			ret void
	}			}

				; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
				; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
				; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
				define amdgpu_kernel void @atomic_xchg_f32_offset(float* %out, float %in) {
				entry:
				%gep = getelementptr float, float* %out, i32 4
				%val = atomicrmw volatile xchg float* %gep, float %in seq_cst
				ret void
				}

	; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:			; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
	; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}			; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
	; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}			; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
	; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]			; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
	define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {			define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32* %out, i32* %out2, i32 %in) {
	entry:			entry:
	%gep = getelementptr i32, i32* %out, i32 4			%gep = getelementptr i32, i32* %out, i32 4
	%val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst			%val = atomicrmw volatile xchg i32* %gep, i32 %in seq_cst
	▲ Show 20 Lines • Show All 323 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/flat_atomics_i64.ll

	Show First 20 Lines • Show All 644 Lines • ▼ Show 20 Lines
	; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}			; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
	define amdgpu_kernel void @atomic_xchg_i64_offset(i64* %out, i64 %in) {			define amdgpu_kernel void @atomic_xchg_i64_offset(i64* %out, i64 %in) {
	entry:			entry:
	%gep = getelementptr i64, i64* %out, i64 4			%gep = getelementptr i64, i64* %out, i64 4
	%tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst			%tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
	ret void			ret void
	}			}

				; GCN-LABEL: {{^}}atomic_xchg_f64_offset:
				; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
				define amdgpu_kernel void @atomic_xchg_f64_offset(double* %out, double %in) {
				entry:
				%gep = getelementptr double, double* %out, i64 4
				%tmp0 = atomicrmw volatile xchg double* %gep, double %in seq_cst
				ret void
				}

	; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:			; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
	; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}			; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
	; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]			; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
	define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {			define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
	entry:			entry:
	%gep = getelementptr i64, i64* %out, i64 4			%gep = getelementptr i64, i64* %out, i64 4
	%tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst			%tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
	store i64 %tmp0, i64* %out2			store i64 %tmp0, i64* %out2
	▲ Show 20 Lines • Show All 315 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/global_atomics.ll

	Show First 20 Lines • Show All 833 Lines • ▼ Show 20 Lines
	; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}			; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
	define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {			define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
	entry:			entry:
	%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4			%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
	%val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst			%val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
	ret void			ret void
	}			}

				; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
				; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}

				; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
				define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) {
				entry:
				%gep = getelementptr float, float addrspace(1)* %out, i64 4
				%val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst
				ret void
				}

	; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:			; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
	; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}			; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
	; SIVI: buffer_store_dword [[RET]]			; SIVI: buffer_store_dword [[RET]]

	; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}			; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16 glc{{$}}
	define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {			define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
	entry:			entry:
	%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4			%gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
	▲ Show 20 Lines • Show All 377 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/global_atomics_i64.ll

	Show First 20 Lines • Show All 777 Lines • ▼ Show 20 Lines
	; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}			; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
	define amdgpu_kernel void @atomic_xchg_i64_offset(i64 addrspace(1)* %out, i64 %in) {			define amdgpu_kernel void @atomic_xchg_i64_offset(i64 addrspace(1)* %out, i64 %in) {
	entry:			entry:
	%gep = getelementptr i64, i64 addrspace(1)* %out, i64 4			%gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
	%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst			%tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %gep, i64 %in seq_cst
	ret void			ret void
	}			}

				; GCN-LABEL: {{^}}atomic_xchg_f64_offset:
				; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}

				; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
				define amdgpu_kernel void @atomic_xchg_f64_offset(double addrspace(1)* %out, double %in) {
				entry:
				%gep = getelementptr double, double addrspace(1)* %out, i64 4
				%tmp0 = atomicrmw volatile xchg double addrspace(1)* %gep, double %in seq_cst
				ret void
				}

	; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:			; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
	; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}			; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
	; CIVI: buffer_store_dwordx2 [[RET]]			; CIVI: buffer_store_dwordx2 [[RET]]

	; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}			; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32 glc{{$}}
	define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {			define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
	entry:			entry:
	%gep = getelementptr i64, i64 addrspace(1)* %out, i64 4			%gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
	▲ Show 20 Lines • Show All 386 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/local-atomics.ll

	Show All 30 Lines
	; GCN: s_endpgm			; GCN: s_endpgm
	define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {			define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
	%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4			%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
	%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst			%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
	store i32 %result, i32 addrspace(1)* %out, align 4			store i32 %result, i32 addrspace(1)* %out, align 4
	ret void			ret void
	}			}

				; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_f32_offset:
				; SICIVI: s_mov_b32 m0
				; GFX9-NOT: m0

				; EG: LDS_WRXCHG_RET *
				; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
				; GCN: s_endpgm
				define amdgpu_kernel void @lds_atomic_xchg_ret_f32_offset(float addrspace(1)* %out, float addrspace(3)* %ptr) nounwind {
				%gep = getelementptr float, float addrspace(3)* %ptr, i32 4
				%result = atomicrmw xchg float addrspace(3)* %gep, float 4.0 seq_cst
				store float %result, float addrspace(1)* %out, align 4
				ret void
				}

	; XXX - Is it really necessary to load 4 into VGPR?			; XXX - Is it really necessary to load 4 into VGPR?
	; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:			; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
	; EG: LDS_ADD_RET *			; EG: LDS_ADD_RET *

	; SICIVI-DAG: s_mov_b32 m0			; SICIVI-DAG: s_mov_b32 m0
	; GFX9-NOT: m0			; GFX9-NOT: m0

	; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],			; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]],
	▲ Show 20 Lines • Show All 670 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/local-atomics64.ll

	Show All 21 Lines
	; GCN: s_endpgm			; GCN: s_endpgm
	define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {			define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
	%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4			%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
	%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst			%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
	store i64 %result, i64 addrspace(1)* %out, align 8			store i64 %result, i64 addrspace(1)* %out, align 8
	ret void			ret void
	}			}

				; GCN-LABEL: {{^}}lds_atomic_xchg_ret_f64_offset:
				; SICIVI: s_mov_b32 m0
				; GFX9-NOT: m0

				; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
				; GCN: s_endpgm
				define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(double addrspace(1)* %out, double addrspace(3)* %ptr) nounwind {
				%gep = getelementptr double, double addrspace(3)* %ptr, i32 4
				%result = atomicrmw xchg double addrspace(3)* %gep, double 4.0 seq_cst
				store double %result, double addrspace(1)* %out, align 8
				ret void
				}

	; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:			; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
	; SICIVI: s_mov_b32 m0			; SICIVI: s_mov_b32 m0
	; GFX9-NOT: m0			; GFX9-NOT: m0

	; GCN: ds_add_rtn_u64			; GCN: ds_add_rtn_u64
	; GCN: s_endpgm			; GCN: s_endpgm
	define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {			define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
	%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst			%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
	▲ Show 20 Lines • Show All 578 Lines • Show Last 20 Lines

test/CodeGen/X86/atomic128.ll

	Show First 20 Lines • Show All 354 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: lock cmpxchg16b (%rdi)			; CHECK-NEXT: lock cmpxchg16b (%rdi)
	; CHECK-NEXT: jne LBB13_1			; CHECK-NEXT: jne LBB13_1
	; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end			; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
	; CHECK-NEXT: popq %rbx			; CHECK-NEXT: popq %rbx
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	store atomic i128 %in, i128* %p unordered, align 16			store atomic i128 %in, i128* %p unordered, align 16
	ret void			ret void
	}			}


				@fsc128 = external global fp128

				define void @atomic_fetch_swapf128(fp128 %x) nounwind {
				; CHECK-LABEL: atomic_fetch_swapf128:
				; CHECK: ## %bb.0:
				; CHECK-NEXT: pushq %rbx
				; CHECK-NEXT: movq %rsi, %rcx
				; CHECK-NEXT: movq %rdi, %rbx
				; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi
				; CHECK-NEXT: movq (%rsi), %rax
				; CHECK-NEXT: movq 8(%rsi), %rdx
				; CHECK-NEXT: .p2align 4, 0x90
				; CHECK-NEXT: LBB14_1: ## %atomicrmw.start
				; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
				; CHECK-NEXT: lock cmpxchg16b (%rsi)
				; CHECK-NEXT: jne LBB14_1
				; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
				; CHECK-NEXT: popq %rbx
				; CHECK-NEXT: retq
				%t1 = atomicrmw xchg fp128* @fsc128, fp128 %x acquire
				ret void
				}

test/CodeGen/X86/atomic16.ll

	; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -verify-machineinstrs -show-mc-encoding \| FileCheck %s --check-prefix X64			; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -verify-machineinstrs -show-mc-encoding \| FileCheck %s --check-prefix X64
	; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 -verify-machineinstrs \| FileCheck %s --check-prefix X32			; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 -verify-machineinstrs \| FileCheck %s --check-prefix X32

	@sc16 = external global i16			@sc16 = external global i16
				@fsc16 = external global half

	define void @atomic_fetch_add16() nounwind {			define void @atomic_fetch_add16() nounwind {
	; X64-LABEL: atomic_fetch_add16			; X64-LABEL: atomic_fetch_add16
	; X32-LABEL: atomic_fetch_add16			; X32-LABEL: atomic_fetch_add16
	entry:			entry:
	; 32-bit			; 32-bit
	%t1 = atomicrmw add i16* @sc16, i16 1 acquire			%t1 = atomicrmw add i16* @sc16, i16 1 acquire
	; X64: lock			; X64: lock
	▲ Show 20 Lines • Show All 255 Lines • ▼ Show 20 Lines
	; X64-NOT: lock			; X64-NOT: lock
	; X64: xchgw			; X64: xchgw
	; X32-NOT: lock			; X32-NOT: lock
	; X32: xchgw			; X32: xchgw
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

				define void @atomic_fetch_swapf16(half %x) nounwind {
				%t1 = atomicrmw xchg half* @fsc16, half %x acquire
				; X64-NOT: lock
				; X64: xchgw
				; X32-NOT: lock
				; X32: xchgw
				ret void
				; X64: ret
				; X32: ret
				}

test/CodeGen/X86/atomic32.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -verify-machineinstrs \| FileCheck %s -check-prefixes=X64,X64-CMOV		; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -verify-machineinstrs \| FileCheck %s -check-prefixes=X64,X64-CMOV
; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -verify-machineinstrs \| FileCheck %s -check-prefixes=X86,X86-CMOV		; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -verify-machineinstrs \| FileCheck %s -check-prefixes=X86,X86-CMOV
; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov,-sse -verify-machineinstrs \| FileCheck %s --check-prefixes=X86,X86-NOCMOV		; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov,-sse -verify-machineinstrs \| FileCheck %s --check-prefixes=X86,X86-NOCMOV

@sc32 = external global i32		@sc32 = external global i32
		@fsc32 = external global float

define void @atomic_fetch_add32() nounwind {		define void @atomic_fetch_add32() nounwind {
; X64-LABEL: atomic_fetch_add32:		; X64-LABEL: atomic_fetch_add32:
; X64: # %bb.0: # %entry		; X64: # %bb.0: # %entry
; X64-NEXT: lock incl {{.*}}(%rip)		; X64-NEXT: lock incl {{.*}}(%rip)
; X64-NEXT: lock addl $3, {{.*}}(%rip)		; X64-NEXT: lock addl $3, {{.*}}(%rip)
; X64-NEXT: movl $5, %eax		; X64-NEXT: movl $5, %eax
; X64-NEXT: lock xaddl %eax, {{.*}}(%rip)		; X64-NEXT: lock xaddl %eax, {{.*}}(%rip)
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	; X86-NEXT: retl
ret void		ret void
}		}

define void @atomic_fetch_and32() nounwind {		define void @atomic_fetch_and32() nounwind {
; X64-LABEL: atomic_fetch_and32:		; X64-LABEL: atomic_fetch_and32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: lock andl $3, {{.*}}(%rip)		; X64-NEXT: lock andl $3, {{.*}}(%rip)
; X64-NEXT: movl sc32, %eax		; X64-NEXT: movl sc32, %eax
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB2_1: # %atomicrmw.start		; X64-NEXT: .LBB2_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: andl $5, %ecx		; X64-NEXT: andl $5, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)		; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
; X64-NEXT: sete %dl		; X64-NEXT: sete %dl
; X64-NEXT: testb $1, %dl		; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB2_2		; X64-NEXT: jne .LBB2_2
; X64-NEXT: jmp .LBB2_1		; X64-NEXT: jmp .LBB2_1
; X64-NEXT: .LBB2_2: # %atomicrmw.end		; X64-NEXT: .LBB2_2: # %atomicrmw.end
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: lock andl %eax, {{.*}}(%rip)		; X64-NEXT: lock andl %eax, {{.*}}(%rip)
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-LABEL: atomic_fetch_and32:		; X86-LABEL: atomic_fetch_and32:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: subl $8, %esp		; X86-NEXT: subl $8, %esp
; X86-NEXT: lock andl $3, sc32		; X86-NEXT: lock andl $3, sc32
; X86-NEXT: movl sc32, %eax		; X86-NEXT: movl sc32, %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: .LBB2_1: # %atomicrmw.start		; X86-NEXT: .LBB2_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, %ecx		; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $5, %ecx		; X86-NEXT: andl $5, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32		; X86-NEXT: lock cmpxchgl %ecx, sc32
; X86-NEXT: sete %dl		; X86-NEXT: sete %dl
; X86-NEXT: testb $1, %dl		; X86-NEXT: testb $1, %dl
; X86-NEXT: movl %eax, %ecx		; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill		; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: jne .LBB2_2		; X86-NEXT: jne .LBB2_2
; X86-NEXT: jmp .LBB2_1		; X86-NEXT: jmp .LBB2_1
; X86-NEXT: .LBB2_2: # %atomicrmw.end		; X86-NEXT: .LBB2_2: # %atomicrmw.end
; X86-NEXT: movl (%esp), %eax # 4-byte Reload		; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: lock andl %eax, sc32		; X86-NEXT: lock andl %eax, sc32
; X86-NEXT: addl $8, %esp		; X86-NEXT: addl $8, %esp
; X86-NEXT: retl		; X86-NEXT: retl
%t1 = atomicrmw and i32* @sc32, i32 3 acquire		%t1 = atomicrmw and i32* @sc32, i32 3 acquire
%t2 = atomicrmw and i32* @sc32, i32 5 acquire		%t2 = atomicrmw and i32* @sc32, i32 5 acquire
%t3 = atomicrmw and i32* @sc32, i32 %t2 acquire		%t3 = atomicrmw and i32* @sc32, i32 %t2 acquire
ret void		ret void
}		}

define void @atomic_fetch_or32() nounwind {		define void @atomic_fetch_or32() nounwind {
; X64-LABEL: atomic_fetch_or32:		; X64-LABEL: atomic_fetch_or32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: lock orl $3, {{.*}}(%rip)		; X64-NEXT: lock orl $3, {{.*}}(%rip)
; X64-NEXT: movl sc32, %eax		; X64-NEXT: movl sc32, %eax
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB3_1: # %atomicrmw.start		; X64-NEXT: .LBB3_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: orl $5, %ecx		; X64-NEXT: orl $5, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)		; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
; X64-NEXT: sete %dl		; X64-NEXT: sete %dl
; X64-NEXT: testb $1, %dl		; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB3_2		; X64-NEXT: jne .LBB3_2
; X64-NEXT: jmp .LBB3_1		; X64-NEXT: jmp .LBB3_1
; X64-NEXT: .LBB3_2: # %atomicrmw.end		; X64-NEXT: .LBB3_2: # %atomicrmw.end
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: lock orl %eax, {{.*}}(%rip)		; X64-NEXT: lock orl %eax, {{.*}}(%rip)
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-LABEL: atomic_fetch_or32:		; X86-LABEL: atomic_fetch_or32:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: subl $8, %esp		; X86-NEXT: subl $8, %esp
; X86-NEXT: lock orl $3, sc32		; X86-NEXT: lock orl $3, sc32
; X86-NEXT: movl sc32, %eax		; X86-NEXT: movl sc32, %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: .LBB3_1: # %atomicrmw.start		; X86-NEXT: .LBB3_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, %ecx		; X86-NEXT: movl %eax, %ecx
; X86-NEXT: orl $5, %ecx		; X86-NEXT: orl $5, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32		; X86-NEXT: lock cmpxchgl %ecx, sc32
; X86-NEXT: sete %dl		; X86-NEXT: sete %dl
; X86-NEXT: testb $1, %dl		; X86-NEXT: testb $1, %dl
; X86-NEXT: movl %eax, %ecx		; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill		; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: jne .LBB3_2		; X86-NEXT: jne .LBB3_2
; X86-NEXT: jmp .LBB3_1		; X86-NEXT: jmp .LBB3_1
; X86-NEXT: .LBB3_2: # %atomicrmw.end		; X86-NEXT: .LBB3_2: # %atomicrmw.end
; X86-NEXT: movl (%esp), %eax # 4-byte Reload		; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: lock orl %eax, sc32		; X86-NEXT: lock orl %eax, sc32
; X86-NEXT: addl $8, %esp		; X86-NEXT: addl $8, %esp
; X86-NEXT: retl		; X86-NEXT: retl
%t1 = atomicrmw or i32* @sc32, i32 3 acquire		%t1 = atomicrmw or i32* @sc32, i32 3 acquire
%t2 = atomicrmw or i32* @sc32, i32 5 acquire		%t2 = atomicrmw or i32* @sc32, i32 5 acquire
%t3 = atomicrmw or i32* @sc32, i32 %t2 acquire		%t3 = atomicrmw or i32* @sc32, i32 %t2 acquire
ret void		ret void
}		}

define void @atomic_fetch_xor32() nounwind {		define void @atomic_fetch_xor32() nounwind {
; X64-LABEL: atomic_fetch_xor32:		; X64-LABEL: atomic_fetch_xor32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: lock xorl $3, {{.*}}(%rip)		; X64-NEXT: lock xorl $3, {{.*}}(%rip)
; X64-NEXT: movl sc32, %eax		; X64-NEXT: movl sc32, %eax
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB4_1: # %atomicrmw.start		; X64-NEXT: .LBB4_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: xorl $5, %ecx		; X64-NEXT: xorl $5, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)		; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
; X64-NEXT: sete %dl		; X64-NEXT: sete %dl
; X64-NEXT: testb $1, %dl		; X64-NEXT: testb $1, %dl
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB4_2		; X64-NEXT: jne .LBB4_2
; X64-NEXT: jmp .LBB4_1		; X64-NEXT: jmp .LBB4_1
; X64-NEXT: .LBB4_2: # %atomicrmw.end		; X64-NEXT: .LBB4_2: # %atomicrmw.end
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: lock xorl %eax, {{.*}}(%rip)		; X64-NEXT: lock xorl %eax, {{.*}}(%rip)
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-LABEL: atomic_fetch_xor32:		; X86-LABEL: atomic_fetch_xor32:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: subl $8, %esp		; X86-NEXT: subl $8, %esp
; X86-NEXT: lock xorl $3, sc32		; X86-NEXT: lock xorl $3, sc32
; X86-NEXT: movl sc32, %eax		; X86-NEXT: movl sc32, %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: .LBB4_1: # %atomicrmw.start		; X86-NEXT: .LBB4_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, %ecx		; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorl $5, %ecx		; X86-NEXT: xorl $5, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32		; X86-NEXT: lock cmpxchgl %ecx, sc32
; X86-NEXT: sete %dl		; X86-NEXT: sete %dl
; X86-NEXT: testb $1, %dl		; X86-NEXT: testb $1, %dl
; X86-NEXT: movl %eax, %ecx		; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill		; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: jne .LBB4_2		; X86-NEXT: jne .LBB4_2
; X86-NEXT: jmp .LBB4_1		; X86-NEXT: jmp .LBB4_1
; X86-NEXT: .LBB4_2: # %atomicrmw.end		; X86-NEXT: .LBB4_2: # %atomicrmw.end
; X86-NEXT: movl (%esp), %eax # 4-byte Reload		; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: lock xorl %eax, sc32		; X86-NEXT: lock xorl %eax, sc32
; X86-NEXT: addl $8, %esp		; X86-NEXT: addl $8, %esp
; X86-NEXT: retl		; X86-NEXT: retl
%t1 = atomicrmw xor i32* @sc32, i32 3 acquire		%t1 = atomicrmw xor i32* @sc32, i32 3 acquire
%t2 = atomicrmw xor i32* @sc32, i32 5 acquire		%t2 = atomicrmw xor i32* @sc32, i32 5 acquire
%t3 = atomicrmw xor i32* @sc32, i32 %t2 acquire		%t3 = atomicrmw xor i32* @sc32, i32 %t2 acquire
ret void		ret void
}		}

define void @atomic_fetch_nand32(i32 %x) nounwind {		define void @atomic_fetch_nand32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_nand32:		; X64-LABEL: atomic_fetch_nand32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl sc32, %eax		; X64-NEXT: movl sc32, %eax
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB5_1: # %atomicrmw.start		; X64-NEXT: .LBB5_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %edx # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
; X64-NEXT: andl %edx, %ecx		; X64-NEXT: andl %edx, %ecx
; X64-NEXT: notl %ecx		; X64-NEXT: notl %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)		; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
; X64-NEXT: sete %sil		; X64-NEXT: sete %sil
; X64-NEXT: testb $1, %sil		; X64-NEXT: testb $1, %sil
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB5_2		; X64-NEXT: jne .LBB5_2
; X64-NEXT: jmp .LBB5_1		; X64-NEXT: jmp .LBB5_1
; X64-NEXT: .LBB5_2: # %atomicrmw.end		; X64-NEXT: .LBB5_2: # %atomicrmw.end
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-LABEL: atomic_fetch_nand32:		; X86-LABEL: atomic_fetch_nand32:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %ebx		; X86-NEXT: pushl %ebx
; X86-NEXT: subl $8, %esp		; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl sc32, %ecx		; X86-NEXT: movl sc32, %ecx
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill		; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-NEXT: .LBB5_1: # %atomicrmw.start		; X86-NEXT: .LBB5_1: # %atomicrmw.start
; X86-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl (%esp), %eax # 4-byte Reload		; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: movl %eax, %ecx		; X86-NEXT: movl %eax, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload		; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: andl %edx, %ecx		; X86-NEXT: andl %edx, %ecx
; X86-NEXT: notl %ecx		; X86-NEXT: notl %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32		; X86-NEXT: lock cmpxchgl %ecx, sc32
; X86-NEXT: sete %bl		; X86-NEXT: sete %bl
; X86-NEXT: testb $1, %bl		; X86-NEXT: testb $1, %bl
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill		; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: jne .LBB5_2		; X86-NEXT: jne .LBB5_2
; X86-NEXT: jmp .LBB5_1		; X86-NEXT: jmp .LBB5_1
; X86-NEXT: .LBB5_2: # %atomicrmw.end		; X86-NEXT: .LBB5_2: # %atomicrmw.end
; X86-NEXT: addl $8, %esp		; X86-NEXT: addl $8, %esp
; X86-NEXT: popl %ebx		; X86-NEXT: popl %ebx
; X86-NEXT: retl		; X86-NEXT: retl
%t1 = atomicrmw nand i32* @sc32, i32 %x acquire		%t1 = atomicrmw nand i32* @sc32, i32 %x acquire
ret void		ret void
}		}

define void @atomic_fetch_max32(i32 %x) nounwind {		define void @atomic_fetch_max32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_max32:		; X64-LABEL: atomic_fetch_max32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl sc32, %eax		; X64-NEXT: movl sc32, %eax
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB6_1: # %atomicrmw.start		; X64-NEXT: .LBB6_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %edx # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
; X64-NEXT: subl %edx, %ecx		; X64-NEXT: subl %edx, %ecx
; X64-NEXT: cmovgel %eax, %edx		; X64-NEXT: cmovgel %eax, %edx
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)		; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
; X64-NEXT: sete %sil		; X64-NEXT: sete %sil
; X64-NEXT: testb $1, %sil		; X64-NEXT: testb $1, %sil
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB6_2		; X64-NEXT: jne .LBB6_2
; X64-NEXT: jmp .LBB6_1		; X64-NEXT: jmp .LBB6_1
; X64-NEXT: .LBB6_2: # %atomicrmw.end		; X64-NEXT: .LBB6_2: # %atomicrmw.end
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-CMOV-LABEL: atomic_fetch_max32:		; X86-CMOV-LABEL: atomic_fetch_max32:
; X86-CMOV: # %bb.0:		; X86-CMOV: # %bb.0:
; X86-CMOV-NEXT: pushl %ebx		; X86-CMOV-NEXT: pushl %ebx
; X86-CMOV-NEXT: subl $12, %esp		; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: movl sc32, %ecx		; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: .LBB6_1: # %atomicrmw.start		; X86-CMOV-NEXT: .LBB6_1: # %atomicrmw.start
; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1		; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-CMOV-NEXT: movl %eax, %ecx		; X86-CMOV-NEXT: movl %eax, %ecx
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload		; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-CMOV-NEXT: subl %edx, %ecx		; X86-CMOV-NEXT: subl %edx, %ecx
; X86-CMOV-NEXT: cmovgel %eax, %edx		; X86-CMOV-NEXT: cmovgel %eax, %edx
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32		; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-CMOV-NEXT: sete %bl		; X86-CMOV-NEXT: sete %bl
; X86-CMOV-NEXT: testb $1, %bl		; X86-CMOV-NEXT: testb $1, %bl
; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB6_2		; X86-CMOV-NEXT: jne .LBB6_2
; X86-CMOV-NEXT: jmp .LBB6_1		; X86-CMOV-NEXT: jmp .LBB6_1
; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end		; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end
; X86-CMOV-NEXT: addl $12, %esp		; X86-CMOV-NEXT: addl $12, %esp
; X86-CMOV-NEXT: popl %ebx		; X86-CMOV-NEXT: popl %ebx
; X86-CMOV-NEXT: retl		; X86-CMOV-NEXT: retl
;		;
; X86-NOCMOV-LABEL: atomic_fetch_max32:		; X86-NOCMOV-LABEL: atomic_fetch_max32:
; X86-NOCMOV: # %bb.0:		; X86-NOCMOV: # %bb.0:
; X86-NOCMOV-NEXT: pushl %ebx		; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi		; X86-NOCMOV-NEXT: pushl %esi
; X86-NOCMOV-NEXT: subl $24, %esp		; X86-NOCMOV-NEXT: subl $24, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl sc32, %ecx		; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB6_1: # %atomicrmw.start		; X86-NOCMOV-NEXT: .LBB6_1: # %atomicrmw.start
; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, %ecx		; X86-NOCMOV-NEXT: movl %eax, %ecx
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: subl %edx, %ecx		; X86-NOCMOV-NEXT: subl %edx, %ecx
; X86-NOCMOV-NEXT: movl %eax, %esi		; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jge .LBB6_4		; X86-NOCMOV-NEXT: jge .LBB6_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start		; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1		; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB6_4: # %atomicrmw.start		; X86-NOCMOV-NEXT: .LBB6_4: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1		; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, %eax		; X86-NOCMOV-NEXT: movl %ecx, %eax
; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload		; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32		; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-NOCMOV-NEXT: sete %bl		; X86-NOCMOV-NEXT: sete %bl
; X86-NOCMOV-NEXT: testb $1, %bl		; X86-NOCMOV-NEXT: testb $1, %bl
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB6_2		; X86-NOCMOV-NEXT: jne .LBB6_2
; X86-NOCMOV-NEXT: jmp .LBB6_1		; X86-NOCMOV-NEXT: jmp .LBB6_1
; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end		; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end
; X86-NOCMOV-NEXT: addl $24, %esp		; X86-NOCMOV-NEXT: addl $24, %esp
; X86-NOCMOV-NEXT: popl %esi		; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: popl %ebx		; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl		; X86-NOCMOV-NEXT: retl
%t1 = atomicrmw max i32* @sc32, i32 %x acquire		%t1 = atomicrmw max i32* @sc32, i32 %x acquire
ret void		ret void
}		}

define void @atomic_fetch_min32(i32 %x) nounwind {		define void @atomic_fetch_min32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_min32:		; X64-LABEL: atomic_fetch_min32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl sc32, %eax		; X64-NEXT: movl sc32, %eax
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB7_1: # %atomicrmw.start		; X64-NEXT: .LBB7_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %edx # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
; X64-NEXT: subl %edx, %ecx		; X64-NEXT: subl %edx, %ecx
; X64-NEXT: cmovlel %eax, %edx		; X64-NEXT: cmovlel %eax, %edx
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)		; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
; X64-NEXT: sete %sil		; X64-NEXT: sete %sil
; X64-NEXT: testb $1, %sil		; X64-NEXT: testb $1, %sil
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB7_2		; X64-NEXT: jne .LBB7_2
; X64-NEXT: jmp .LBB7_1		; X64-NEXT: jmp .LBB7_1
; X64-NEXT: .LBB7_2: # %atomicrmw.end		; X64-NEXT: .LBB7_2: # %atomicrmw.end
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-CMOV-LABEL: atomic_fetch_min32:		; X86-CMOV-LABEL: atomic_fetch_min32:
; X86-CMOV: # %bb.0:		; X86-CMOV: # %bb.0:
; X86-CMOV-NEXT: pushl %ebx		; X86-CMOV-NEXT: pushl %ebx
; X86-CMOV-NEXT: subl $12, %esp		; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: movl sc32, %ecx		; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: .LBB7_1: # %atomicrmw.start		; X86-CMOV-NEXT: .LBB7_1: # %atomicrmw.start
; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1		; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-CMOV-NEXT: movl %eax, %ecx		; X86-CMOV-NEXT: movl %eax, %ecx
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload		; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-CMOV-NEXT: subl %edx, %ecx		; X86-CMOV-NEXT: subl %edx, %ecx
; X86-CMOV-NEXT: cmovlel %eax, %edx		; X86-CMOV-NEXT: cmovlel %eax, %edx
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32		; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-CMOV-NEXT: sete %bl		; X86-CMOV-NEXT: sete %bl
; X86-CMOV-NEXT: testb $1, %bl		; X86-CMOV-NEXT: testb $1, %bl
; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB7_2		; X86-CMOV-NEXT: jne .LBB7_2
; X86-CMOV-NEXT: jmp .LBB7_1		; X86-CMOV-NEXT: jmp .LBB7_1
; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end		; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end
; X86-CMOV-NEXT: addl $12, %esp		; X86-CMOV-NEXT: addl $12, %esp
; X86-CMOV-NEXT: popl %ebx		; X86-CMOV-NEXT: popl %ebx
; X86-CMOV-NEXT: retl		; X86-CMOV-NEXT: retl
;		;
; X86-NOCMOV-LABEL: atomic_fetch_min32:		; X86-NOCMOV-LABEL: atomic_fetch_min32:
; X86-NOCMOV: # %bb.0:		; X86-NOCMOV: # %bb.0:
; X86-NOCMOV-NEXT: pushl %ebx		; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi		; X86-NOCMOV-NEXT: pushl %esi
; X86-NOCMOV-NEXT: subl $24, %esp		; X86-NOCMOV-NEXT: subl $24, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl sc32, %ecx		; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB7_1: # %atomicrmw.start		; X86-NOCMOV-NEXT: .LBB7_1: # %atomicrmw.start
; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, %ecx		; X86-NOCMOV-NEXT: movl %eax, %ecx
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: subl %edx, %ecx		; X86-NOCMOV-NEXT: subl %edx, %ecx
; X86-NOCMOV-NEXT: movl %eax, %esi		; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jle .LBB7_4		; X86-NOCMOV-NEXT: jle .LBB7_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start		; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1		; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB7_4: # %atomicrmw.start		; X86-NOCMOV-NEXT: .LBB7_4: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1		; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, %eax		; X86-NOCMOV-NEXT: movl %ecx, %eax
; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload		; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32		; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-NOCMOV-NEXT: sete %bl		; X86-NOCMOV-NEXT: sete %bl
; X86-NOCMOV-NEXT: testb $1, %bl		; X86-NOCMOV-NEXT: testb $1, %bl
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB7_2		; X86-NOCMOV-NEXT: jne .LBB7_2
; X86-NOCMOV-NEXT: jmp .LBB7_1		; X86-NOCMOV-NEXT: jmp .LBB7_1
; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end		; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end
; X86-NOCMOV-NEXT: addl $24, %esp		; X86-NOCMOV-NEXT: addl $24, %esp
; X86-NOCMOV-NEXT: popl %esi		; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: popl %ebx		; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl		; X86-NOCMOV-NEXT: retl
%t1 = atomicrmw min i32* @sc32, i32 %x acquire		%t1 = atomicrmw min i32* @sc32, i32 %x acquire
ret void		ret void
}		}

define void @atomic_fetch_umax32(i32 %x) nounwind {		define void @atomic_fetch_umax32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_umax32:		; X64-LABEL: atomic_fetch_umax32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl sc32, %eax		; X64-NEXT: movl sc32, %eax
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB8_1: # %atomicrmw.start		; X64-NEXT: .LBB8_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %edx # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
; X64-NEXT: subl %edx, %ecx		; X64-NEXT: subl %edx, %ecx
; X64-NEXT: cmoval %eax, %edx		; X64-NEXT: cmoval %eax, %edx
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)		; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
; X64-NEXT: sete %sil		; X64-NEXT: sete %sil
; X64-NEXT: testb $1, %sil		; X64-NEXT: testb $1, %sil
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB8_2		; X64-NEXT: jne .LBB8_2
; X64-NEXT: jmp .LBB8_1		; X64-NEXT: jmp .LBB8_1
; X64-NEXT: .LBB8_2: # %atomicrmw.end		; X64-NEXT: .LBB8_2: # %atomicrmw.end
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-CMOV-LABEL: atomic_fetch_umax32:		; X86-CMOV-LABEL: atomic_fetch_umax32:
; X86-CMOV: # %bb.0:		; X86-CMOV: # %bb.0:
; X86-CMOV-NEXT: pushl %ebx		; X86-CMOV-NEXT: pushl %ebx
; X86-CMOV-NEXT: subl $12, %esp		; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: movl sc32, %ecx		; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: .LBB8_1: # %atomicrmw.start		; X86-CMOV-NEXT: .LBB8_1: # %atomicrmw.start
; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1		; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-CMOV-NEXT: movl %eax, %ecx		; X86-CMOV-NEXT: movl %eax, %ecx
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload		; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-CMOV-NEXT: subl %edx, %ecx		; X86-CMOV-NEXT: subl %edx, %ecx
; X86-CMOV-NEXT: cmoval %eax, %edx		; X86-CMOV-NEXT: cmoval %eax, %edx
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32		; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-CMOV-NEXT: sete %bl		; X86-CMOV-NEXT: sete %bl
; X86-CMOV-NEXT: testb $1, %bl		; X86-CMOV-NEXT: testb $1, %bl
; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB8_2		; X86-CMOV-NEXT: jne .LBB8_2
; X86-CMOV-NEXT: jmp .LBB8_1		; X86-CMOV-NEXT: jmp .LBB8_1
; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end		; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end
; X86-CMOV-NEXT: addl $12, %esp		; X86-CMOV-NEXT: addl $12, %esp
; X86-CMOV-NEXT: popl %ebx		; X86-CMOV-NEXT: popl %ebx
; X86-CMOV-NEXT: retl		; X86-CMOV-NEXT: retl
;		;
; X86-NOCMOV-LABEL: atomic_fetch_umax32:		; X86-NOCMOV-LABEL: atomic_fetch_umax32:
; X86-NOCMOV: # %bb.0:		; X86-NOCMOV: # %bb.0:
; X86-NOCMOV-NEXT: pushl %ebx		; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi		; X86-NOCMOV-NEXT: pushl %esi
; X86-NOCMOV-NEXT: subl $24, %esp		; X86-NOCMOV-NEXT: subl $24, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl sc32, %ecx		; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB8_1: # %atomicrmw.start		; X86-NOCMOV-NEXT: .LBB8_1: # %atomicrmw.start
; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, %ecx		; X86-NOCMOV-NEXT: movl %eax, %ecx
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: subl %edx, %ecx		; X86-NOCMOV-NEXT: subl %edx, %ecx
; X86-NOCMOV-NEXT: movl %eax, %esi		; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: ja .LBB8_4		; X86-NOCMOV-NEXT: ja .LBB8_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start		; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1		; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB8_4: # %atomicrmw.start		; X86-NOCMOV-NEXT: .LBB8_4: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1		; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, %eax		; X86-NOCMOV-NEXT: movl %ecx, %eax
; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload		; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32		; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-NOCMOV-NEXT: sete %bl		; X86-NOCMOV-NEXT: sete %bl
; X86-NOCMOV-NEXT: testb $1, %bl		; X86-NOCMOV-NEXT: testb $1, %bl
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB8_2		; X86-NOCMOV-NEXT: jne .LBB8_2
; X86-NOCMOV-NEXT: jmp .LBB8_1		; X86-NOCMOV-NEXT: jmp .LBB8_1
; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end		; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end
; X86-NOCMOV-NEXT: addl $24, %esp		; X86-NOCMOV-NEXT: addl $24, %esp
; X86-NOCMOV-NEXT: popl %esi		; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: popl %ebx		; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl		; X86-NOCMOV-NEXT: retl
%t1 = atomicrmw umax i32* @sc32, i32 %x acquire		%t1 = atomicrmw umax i32* @sc32, i32 %x acquire
ret void		ret void
}		}

define void @atomic_fetch_umin32(i32 %x) nounwind {		define void @atomic_fetch_umin32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_umin32:		; X64-LABEL: atomic_fetch_umin32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl sc32, %eax		; X64-NEXT: movl sc32, %eax
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: .LBB9_1: # %atomicrmw.start		; X64-NEXT: .LBB9_1: # %atomicrmw.start
; X64-NEXT: # =>This Inner Loop Header: Depth=1		; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; X64-NEXT: movl %eax, %ecx		; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %edx # 4-byte Reload		; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload
; X64-NEXT: subl %edx, %ecx		; X64-NEXT: subl %edx, %ecx
; X64-NEXT: cmovbel %eax, %edx		; X64-NEXT: cmovbel %eax, %edx
; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)		; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip)
; X64-NEXT: sete %sil		; X64-NEXT: sete %sil
; X64-NEXT: testb $1, %sil		; X64-NEXT: testb $1, %sil
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: jne .LBB9_2		; X64-NEXT: jne .LBB9_2
; X64-NEXT: jmp .LBB9_1		; X64-NEXT: jmp .LBB9_1
; X64-NEXT: .LBB9_2: # %atomicrmw.end		; X64-NEXT: .LBB9_2: # %atomicrmw.end
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-CMOV-LABEL: atomic_fetch_umin32:		; X86-CMOV-LABEL: atomic_fetch_umin32:
; X86-CMOV: # %bb.0:		; X86-CMOV: # %bb.0:
; X86-CMOV-NEXT: pushl %ebx		; X86-CMOV-NEXT: pushl %ebx
; X86-CMOV-NEXT: subl $12, %esp		; X86-CMOV-NEXT: subl $12, %esp
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-CMOV-NEXT: movl sc32, %ecx		; X86-CMOV-NEXT: movl sc32, %ecx
; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: .LBB9_1: # %atomicrmw.start		; X86-CMOV-NEXT: .LBB9_1: # %atomicrmw.start
; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1		; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-CMOV-NEXT: movl %eax, %ecx		; X86-CMOV-NEXT: movl %eax, %ecx
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload		; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-CMOV-NEXT: subl %edx, %ecx		; X86-CMOV-NEXT: subl %edx, %ecx
; X86-CMOV-NEXT: cmovbel %eax, %edx		; X86-CMOV-NEXT: cmovbel %eax, %edx
; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32		; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-CMOV-NEXT: sete %bl		; X86-CMOV-NEXT: sete %bl
; X86-CMOV-NEXT: testb $1, %bl		; X86-CMOV-NEXT: testb $1, %bl
; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-CMOV-NEXT: jne .LBB9_2		; X86-CMOV-NEXT: jne .LBB9_2
; X86-CMOV-NEXT: jmp .LBB9_1		; X86-CMOV-NEXT: jmp .LBB9_1
; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end		; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end
; X86-CMOV-NEXT: addl $12, %esp		; X86-CMOV-NEXT: addl $12, %esp
; X86-CMOV-NEXT: popl %ebx		; X86-CMOV-NEXT: popl %ebx
; X86-CMOV-NEXT: retl		; X86-CMOV-NEXT: retl
;		;
; X86-NOCMOV-LABEL: atomic_fetch_umin32:		; X86-NOCMOV-LABEL: atomic_fetch_umin32:
; X86-NOCMOV: # %bb.0:		; X86-NOCMOV: # %bb.0:
; X86-NOCMOV-NEXT: pushl %ebx		; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi		; X86-NOCMOV-NEXT: pushl %esi
; X86-NOCMOV-NEXT: subl $24, %esp		; X86-NOCMOV-NEXT: subl $24, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl sc32, %ecx		; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB9_1: # %atomicrmw.start		; X86-NOCMOV-NEXT: .LBB9_1: # %atomicrmw.start
; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1		; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, %ecx		; X86-NOCMOV-NEXT: movl %eax, %ecx
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: subl %edx, %ecx		; X86-NOCMOV-NEXT: subl %edx, %ecx
; X86-NOCMOV-NEXT: movl %eax, %esi		; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jbe .LBB9_4		; X86-NOCMOV-NEXT: jbe .LBB9_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start		; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1		; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: .LBB9_4: # %atomicrmw.start		; X86-NOCMOV-NEXT: .LBB9_4: # %atomicrmw.start
; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1		; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload		; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %ecx, %eax		; X86-NOCMOV-NEXT: movl %ecx, %eax
; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload		; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32		; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32
; X86-NOCMOV-NEXT: sete %bl		; X86-NOCMOV-NEXT: sete %bl
; X86-NOCMOV-NEXT: testb $1, %bl		; X86-NOCMOV-NEXT: testb $1, %bl
; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill		; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jne .LBB9_2		; X86-NOCMOV-NEXT: jne .LBB9_2
; X86-NOCMOV-NEXT: jmp .LBB9_1		; X86-NOCMOV-NEXT: jmp .LBB9_1
; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end		; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end
; X86-NOCMOV-NEXT: addl $24, %esp		; X86-NOCMOV-NEXT: addl $24, %esp
; X86-NOCMOV-NEXT: popl %esi		; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: popl %ebx		; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl		; X86-NOCMOV-NEXT: retl
%t1 = atomicrmw umin i32* @sc32, i32 %x acquire		%t1 = atomicrmw umin i32* @sc32, i32 %x acquire
ret void		ret void
}		}

define void @atomic_fetch_cmpxchg32() nounwind {		define void @atomic_fetch_cmpxchg32() nounwind {
; X64-LABEL: atomic_fetch_cmpxchg32:		; X64-LABEL: atomic_fetch_cmpxchg32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: movl $1, %ecx		; X64-NEXT: movl $1, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)		; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-LABEL: atomic_fetch_cmpxchg32:		; X86-LABEL: atomic_fetch_cmpxchg32:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %eax		; X86-NEXT: pushl %eax
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: movl $1, %ecx		; X86-NEXT: movl $1, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32		; X86-NEXT: lock cmpxchgl %ecx, sc32
Show All 18 Lines	; X86-NEXT: retl
store atomic i32 %x, i32* @sc32 release, align 4		store atomic i32 %x, i32* @sc32 release, align 4
ret void		ret void
}		}

define void @atomic_fetch_swap32(i32 %x) nounwind {		define void @atomic_fetch_swap32(i32 %x) nounwind {
; X64-LABEL: atomic_fetch_swap32:		; X64-LABEL: atomic_fetch_swap32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: xchgl %edi, {{.*}}(%rip)		; X64-NEXT: xchgl %edi, {{.*}}(%rip)
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill		; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X86-LABEL: atomic_fetch_swap32:		; X86-LABEL: atomic_fetch_swap32:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %eax		; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xchgl %eax, sc32		; X86-NEXT: xchgl %eax, sc32
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill		; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: popl %eax		; X86-NEXT: popl %eax
; X86-NEXT: retl		; X86-NEXT: retl
%t1 = atomicrmw xchg i32* @sc32, i32 %x acquire		%t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
ret void		ret void
}		}

		define void @atomic_fetch_swapf32(float %x) nounwind {
		; X64-LABEL: atomic_fetch_swapf32:
		; X64: # %bb.0:
		; X64-NEXT: movd %xmm0, %eax
		; X64-NEXT: xchgl %eax, {{.*}}(%rip)
		; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
		; X64-NEXT: retq
		;
		; X86-CMOV-LABEL: atomic_fetch_swapf32:
		; X86-CMOV: # %bb.0:
		; X86-CMOV-NEXT: pushl %eax
		; X86-CMOV-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
		; X86-CMOV-NEXT: movd %xmm0, %eax
		; X86-CMOV-NEXT: xchgl %eax, fsc32
		; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
		; X86-CMOV-NEXT: popl %eax
		; X86-CMOV-NEXT: retl
		;
		; X86-NOCMOV-LABEL: atomic_fetch_swapf32:
		; X86-NOCMOV: # %bb.0:
		; X86-NOCMOV-NEXT: subl $8, %esp
		; X86-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
		; X86-NOCMOV-NEXT: fstps {{[0-9]+}}(%esp)
		; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NOCMOV-NEXT: xchgl %eax, fsc32
		; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
		; X86-NOCMOV-NEXT: addl $8, %esp
		; X86-NOCMOV-NEXT: retl
		%t1 = atomicrmw xchg float* @fsc32, float %x acquire
		ret void
		}

test/CodeGen/X86/atomic64.ll

	; RUN: llc < %s -O0 -mtriple=x86_64-- -mcpu=corei7 -verify-machineinstrs \| FileCheck %s --check-prefix X64			; RUN: llc < %s -O0 -mtriple=x86_64-- -mcpu=corei7 -verify-machineinstrs \| FileCheck %s --check-prefix X64

	@sc64 = external global i64			@sc64 = external global i64
				@fsc64 = external global double

	define void @atomic_fetch_add64() nounwind {			define void @atomic_fetch_add64() nounwind {
	; X64-LABEL: atomic_fetch_add64:			; X64-LABEL: atomic_fetch_add64:
	; X32-LABEL: atomic_fetch_add64:			; X32-LABEL: atomic_fetch_add64:
	entry:			entry:
	%t1 = atomicrmw add i64* @sc64, i64 1 acquire			%t1 = atomicrmw add i64* @sc64, i64 1 acquire
	; X64: lock			; X64: lock
	; X64: incq			; X64: incq
	▲ Show 20 Lines • Show All 216 Lines • ▼ Show 20 Lines
	; X64-NOT: lock			; X64-NOT: lock
	; X64: xchgq			; X64: xchgq
	; X32: lock			; X32: lock
	; X32: xchg8b			; X32: xchg8b
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

				define void @atomic_fetch_swapf64(double %x) nounwind {
				; X64-LABEL: atomic_fetch_swapf64:
				; X32-LABEL: atomic_fetch_swapf64:
				%t1 = atomicrmw xchg double* @fsc64, double %x acquire
				; X64-NOT: lock
				; X64: xchgq
				; X32: lock
				; X32: xchg8b
				ret void
				; X64: ret
				; X32: ret
				}

This is an archive of the discontinued LLVM Phabricator instance.

Allow FP types for atomicrmw xchg
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 167881

lib/AsmParser/LLParser.cpp

lib/CodeGen/AtomicExpandPass.cpp

lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

lib/CodeGen/SelectionDAG/LegalizeTypes.h

lib/CodeGen/TargetLoweringBase.cpp

lib/IR/Verifier.cpp

lib/Target/AArch64/AArch64ISelLowering.cpp

test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll

test/Bitcode/compatibility.ll

test/CodeGen/AMDGPU/flat_atomics.ll

test/CodeGen/AMDGPU/flat_atomics_i64.ll

test/CodeGen/AMDGPU/global_atomics.ll

test/CodeGen/AMDGPU/global_atomics_i64.ll

test/CodeGen/AMDGPU/local-atomics.ll

test/CodeGen/AMDGPU/local-atomics64.ll

test/CodeGen/X86/atomic128.ll

test/CodeGen/X86/atomic16.ll

test/CodeGen/X86/atomic32.ll

test/CodeGen/X86/atomic64.ll

This is an archive of the discontinued LLVM Phabricator instance.

Allow FP types for atomicrmw xchgClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 167881

lib/AsmParser/LLParser.cpp

lib/CodeGen/AtomicExpandPass.cpp

lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

lib/CodeGen/SelectionDAG/LegalizeTypes.h

lib/CodeGen/TargetLoweringBase.cpp

lib/IR/Verifier.cpp

lib/Target/AArch64/AArch64ISelLowering.cpp

test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll

test/Bitcode/compatibility.ll

test/CodeGen/AMDGPU/flat_atomics.ll

test/CodeGen/AMDGPU/flat_atomics_i64.ll

test/CodeGen/AMDGPU/global_atomics.ll

test/CodeGen/AMDGPU/global_atomics_i64.ll

test/CodeGen/AMDGPU/local-atomics.ll

test/CodeGen/AMDGPU/local-atomics64.ll

test/CodeGen/X86/atomic128.ll

test/CodeGen/X86/atomic16.ll

test/CodeGen/X86/atomic32.ll

test/CodeGen/X86/atomic64.ll

Allow FP types for atomicrmw xchg
ClosedPublic