diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1197,13 +1197,16 @@ let ScalarMemoryVT = i16; } +// TODO: These need renamed to simple_store/simple_load and then split +// into a volatile/atomic/ordered flavors so that respective transforms +// can pick the right combination. def nonvolatile_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return !cast(N)->isVolatile(); + return cast(N)->isSimple(); }]>; def nonvolatile_store : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ - return !cast(N)->isVolatile(); + return cast(N)->isSimple(); }]>; // nontemporal store fragments. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4898,8 +4898,8 @@ return true; } - // Do not change the width of a volatile load. - if (LoadN->isVolatile()) + // Do not change the width of a volatile or atomic loads. + if (!LoadN->isSimple()) return false; // Do not generate loads of non-round integer types since these can @@ -4931,8 +4931,8 @@ if (!MemVT.isRound()) return false; - // Don't change the width of a volatile load. - if (LDST->isVolatile()) + // Don't change the width of a volatile or atomic loads. + if (!LDST->isSimple()) return false; // Verify that we are actually reducing a load width here. @@ -5519,7 +5519,7 @@ unsigned MemBitSize = MemVT.getScalarSizeInBits(); APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize); if (DAG.MaskedValueIsZero(N1, ExtBits) && - ((!LegalOperations && !LN0->isVolatile()) || + ((!LegalOperations && LN0->isSimple()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), @@ -6613,7 +6613,7 @@ Depth + 1); case ISD::LOAD: { auto L = cast(Op.getNode()); - if (L->isVolatile() || L->isIndexed()) + if (!L->isSimple() || L->isIndexed()) return None; unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); @@ -6702,8 +6702,9 @@ SDValue Chain; SmallVector Stores; for (StoreSDNode *Store = N; Store; Store = dyn_cast(Chain)) { + // TODO: Allow unordered atomics when wider type is legal (see D66309) if (Store->getMemoryVT() != MVT::i8 || - Store->isVolatile() || Store->isIndexed()) + !Store->isSimple() || Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); @@ -6914,7 +6915,8 @@ return SDValue(); LoadSDNode *L = P->Load; - assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && + assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && + !L->isIndexed() && "Must be enforced by calculateByteProvider"); assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); @@ -9244,8 +9246,9 @@ LoadSDNode *LN0 = cast(N0); if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || - !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || - !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + !N0.hasOneUse() || !LN0->isSimple() || + !DstVT.isVector() || !DstVT.isPow2VectorType() || + !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) return SDValue(); SmallVector SetCCs; @@ -9446,7 +9449,8 @@ LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); - if ((LegalOperations || LN0->isVolatile() || VT.isVector()) && + if ((LegalOperations || !LN0->isSimple() || + VT.isVector()) && !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) return SDValue(); @@ -9471,7 +9475,7 @@ if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()) || ((LegalOperations || VT.isVector() || - cast(N0)->isVolatile()) && + !cast(N0)->isSimple()) && !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) return {}; @@ -10547,7 +10551,7 @@ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast(N0)->getMemoryVT() && - ((!LegalOperations && !cast(N0)->isVolatile() && + ((!LegalOperations && cast(N0)->isSimple() && N0.hasOneUse()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast(N0); @@ -10564,7 +10568,7 @@ if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && EVT == cast(N0)->getMemoryVT() && - ((!LegalOperations && !cast(N0)->isVolatile()) || + ((!LegalOperations && cast(N0)->isSimple()) && TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -10791,7 +10795,7 @@ // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); - if (!LN0->isVolatile() && + if (LN0->isSimple() && LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), @@ -11085,7 +11089,7 @@ // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - ((!LegalOperations && !cast(N0)->isVolatile()) || + ((!LegalOperations && cast(N0)->isSimple()) || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast(N0); @@ -14013,11 +14017,12 @@ } SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { - if (OptLevel == CodeGenOpt::None || LD->isVolatile()) + if (OptLevel == CodeGenOpt::None || !LD->isSimple()) return SDValue(); SDValue Chain = LD->getOperand(0); StoreSDNode *ST = dyn_cast(Chain.getNode()); - if (!ST || ST->isVolatile()) + // TODO: Relax this restriction for unordered atomics (see D66309) + if (!ST || !ST->isSimple()) return SDValue(); EVT LDType = LD->getValueType(0); @@ -14116,7 +14121,8 @@ // If load is not volatile and there are no uses of the loaded value (and // the updated indexed value in case of indexed loads), change uses of the // chain value into uses of the chain input (i.e. delete the dead load). - if (!LD->isVolatile()) { + // TODO: Allow this for unordered atomics (see D66309) + if (LD->isSimple()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. if (!N->hasAnyUseOfValue(0)) { @@ -14687,7 +14693,7 @@ return false; LoadSDNode *LD = cast(N); - if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + if (!LD->isSimple() || !ISD::isNormalLoad(LD) || !LD->getValueType(0).isInteger()) return false; @@ -14918,7 +14924,7 @@ /// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast(N); - if (ST->isVolatile()) + if (!ST->isSimple()) return SDValue(); SDValue Chain = ST->getChain(); @@ -15374,14 +15380,16 @@ // Loads must only have one use. if (!Ld->hasNUsesOfValue(1, 0)) return; - // The memory operands must not be volatile/indexed. - if (Ld->isVolatile() || Ld->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Ld->isSimple() || Ld->isIndexed()) return; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { - // The memory operands must not be volatile/indexed. - if (Other->isVolatile() || Other->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Other->isSimple() || Other->isIndexed()) return false; // Don't mix temporal stores with non-temporal stores. if (St->isNonTemporal() != Other->isNonTemporal()) @@ -15401,8 +15409,10 @@ // Loads must only have one use. if (!OtherLd->hasNUsesOfValue(1, 0)) return false; - // The memory operands must not be volatile/indexed. - if (OtherLd->isVolatile() || OtherLd->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!OtherLd->isSimple() || + OtherLd->isIndexed()) return false; // Don't mix temporal loads with non-temporal loads. if (cast(Val)->isNonTemporal() != OtherLd->isNonTemporal()) @@ -16145,7 +16155,7 @@ case MVT::ppcf128: return SDValue(); case MVT::f32: - if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). @@ -16157,7 +16167,7 @@ return SDValue(); case MVT::f64: if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && - !ST->isVolatile()) || + ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). @@ -16166,7 +16176,7 @@ Ptr, ST->getMemOperand()); } - if (!ST->isVolatile() && + if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the @@ -16213,7 +16223,8 @@ // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - if (((!LegalOperations && !ST->isVolatile()) || + // TODO: May be able to relax for unordered atomics (see D66309) + if (((!LegalOperations && ST->isSimple()) || TLI.isOperationLegal(ISD::STORE, SVT)) && TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, DAG, *ST->getMemOperand())) { @@ -16294,9 +16305,10 @@ // If this is a load followed by a store to the same location, then the store // is dead/noop. + // TODO: Can relax for unordered atomics (see D66309) if (LoadSDNode *Ld = dyn_cast(Value)) { if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && - ST->isUnindexed() && !ST->isVolatile() && + ST->isUnindexed() && ST->isSimple() && // There can't be any side effects between the load and store, such as // a call or store. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { @@ -16305,9 +16317,10 @@ } } + // TODO: Can relax for unordered atomics (see D66309) if (StoreSDNode *ST1 = dyn_cast(Chain)) { - if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && - !ST1->isVolatile()) { + if (ST->isUnindexed() && ST->isSimple() && + ST1->isUnindexed() && ST1->isSimple()) { if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT()) { // If this is a store followed by a store with the same value to the @@ -16436,7 +16449,8 @@ break; case ISD::STORE: { StoreSDNode *ST = dyn_cast(Chain); - if (ST->isVolatile() || ST->isIndexed()) + // TODO: Can relax for unordered atomics (see D66309) + if (!ST->isSimple() || ST->isIndexed()) continue; const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, @@ -16745,7 +16759,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { - assert(!OriginalLoad->isVolatile()); + assert(OriginalLoad->isSimple()); EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); @@ -17053,7 +17067,7 @@ ISD::isNormalLoad(VecOp.getNode()) && !Index->hasPredecessor(VecOp.getNode())) { auto *VecLoad = dyn_cast(VecOp); - if (VecLoad && !VecLoad->isVolatile()) + if (VecLoad && VecLoad->isSimple()) return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); } @@ -17112,7 +17126,7 @@ // Make sure we found a non-volatile load and the extractelement is // the only use. - if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. @@ -18258,7 +18272,8 @@ auto *Ld = dyn_cast(Extract->getOperand(0)); auto *ExtIdx = dyn_cast(Extract->getOperand(1)); - if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx) + if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || + !ExtIdx) return SDValue(); // Allow targets to opt-out. @@ -19831,7 +19846,9 @@ // Token chains must be identical. if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. - LLD->isVolatile() || RLD->isVolatile() || + // Be conservative for atomics for the moment + // TODO: This does appear to be legal for unordered atomics (see D66309) + !LLD->isSimple() || !RLD->isSimple() || // FIXME: If either is a pre/post inc/dec load, // we'd need to split out the address adjustment. LLD->isIndexed() || RLD->isIndexed() || @@ -20533,6 +20550,7 @@ struct MemUseCharacteristics { bool IsVolatile; + bool IsAtomic; SDValue BasePtr; int64_t Offset; Optional NumBytes; @@ -20548,18 +20566,20 @@ : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; - return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/, + return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), + Offset /*base offset*/, Optional(LSN->getMemoryVT().getStoreSize()), LSN->getMemOperand()}; } if (const auto *LN = cast(N)) - return {false /*isVolatile*/, LN->getOperand(1), + return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), (LN->hasOffset()) ? LN->getOffset() : 0, (LN->hasOffset()) ? Optional(LN->getSize()) : Optional(), (MachineMemOperand *)nullptr}; // Default. - return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/, + return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(), + (int64_t)0 /*offset*/, Optional() /*size*/, (MachineMemOperand *)nullptr}; }; @@ -20575,6 +20595,11 @@ if (MUC0.IsVolatile && MUC1.IsVolatile) return true; + // Be conservative about atomics for the moment + // TODO: This is way overconservative for unordered atomics (see D66309) + if (MUC0.IsAtomic && MUC1.IsAtomic) + return true; + if (MUC0.MMO && MUC1.MMO) { if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) @@ -20656,7 +20681,8 @@ SmallPtrSet Visited; // Visited node set. // Get alias information for node. - const bool IsLoad = isa(N) && !cast(N)->isVolatile(); + // TODO: relax aliasing for unordered atomics (see D66309) + const bool IsLoad = isa(N) && cast(N)->isSimple(); // Starting off. Chains.push_back(OriginalChain); @@ -20672,8 +20698,9 @@ case ISD::LOAD: case ISD::STORE: { // Get alias information for C. + // TODO: Relax aliasing for unordered atomics (see D66309) bool IsOpLoad = isa(C.getNode()) && - !cast(C.getNode())->isVolatile(); + cast(C.getNode())->isSimple(); if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); @@ -20828,7 +20855,8 @@ // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; - if (Chain->isVolatile() || Chain->isIndexed()) + // TODO: Relax for unordered atomics (see D66309) + if (!Chain->isSimple() || Chain->isIndexed()) break; // Find the base pointer and offset for this memory node. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4772,7 +4772,7 @@ int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; - unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads. + unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads. // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8973,7 +8973,7 @@ // Loads don't have side effects, look through them. if (LoadSDNode *Ld = dyn_cast(*this)) { - if (!Ld->isVolatile()) + if (Ld->isUnordered()) return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); } return false; @@ -9211,6 +9211,9 @@ int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; + // TODO: probably too restrictive for atomics, revisit + if (!LD->isSimple()) + return false; if (LD->isIndexed() || Base->isIndexed()) return false; if (LD->getChain() != Base->getChain()) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3224,7 +3224,7 @@ LoadSDNode *Lod = cast(N0.getOperand(0)); APInt bestMask; unsigned bestWidth = 0, bestOffset = 0; - if (!Lod->isVolatile() && Lod->isUnindexed()) { + if (Lod->isSimple() && Lod->isUnindexed()) { unsigned origWidth = N0.getValueSizeInBits(); unsigned maskWidth = origWidth; // We can narrow (e.g.) 16-bit extending loads on 32-bit target to diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -604,16 +604,11 @@ ; Legal if wider type is also atomic (TODO) define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) { -; CHECK-NOX-LABEL: widen_zero_init: -; CHECK-NOX: # %bb.0: -; CHECK-NOX-NEXT: movl $0, (%rdi) -; CHECK-NOX-NEXT: movl $0, 4(%rdi) -; CHECK-NOX-NEXT: retq -; -; CHECK-EX-LABEL: widen_zero_init: -; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movq $0, (%rdi) -; CHECK-EX-NEXT: retq +; CHECK-LABEL: widen_zero_init: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $0, (%rdi) +; CHECK-NEXT: movl $0, 4(%rdi) +; CHECK-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 0, i32* %p0 unordered, align 8 store atomic i32 0, i32* %p1 unordered, align 4 @@ -622,16 +617,11 @@ ; Not legal to widen due to alignment restriction define void @widen_zero_init_unaligned(i32* %p0, i32 %v1, i32 %v2) { -; CHECK-NOX-LABEL: widen_zero_init_unaligned: -; CHECK-NOX: # %bb.0: -; CHECK-NOX-NEXT: movl $0, (%rdi) -; CHECK-NOX-NEXT: movl $0, 4(%rdi) -; CHECK-NOX-NEXT: retq -; -; CHECK-EX-LABEL: widen_zero_init_unaligned: -; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movq $0, (%rdi) -; CHECK-EX-NEXT: retq +; CHECK-LABEL: widen_zero_init_unaligned: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $0, (%rdi) +; CHECK-NEXT: movl $0, 4(%rdi) +; CHECK-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 0, i32* %p0 unordered, align 4 store atomic i32 0, i32* %p1 unordered, align 4 @@ -1449,7 +1439,7 @@ ; ; CHECK-EX-LABEL: load_fold_shl3: ; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movb (%rsi), %al +; CHECK-EX-NEXT: movq (%rsi), %rax ; CHECK-EX-NEXT: shlxq %rax, (%rdi), %rax ; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 @@ -1510,7 +1500,7 @@ ; ; CHECK-EX-LABEL: load_fold_lshr3: ; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movb (%rsi), %al +; CHECK-EX-NEXT: movq (%rsi), %rax ; CHECK-EX-NEXT: shrxq %rax, (%rdi), %rax ; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 @@ -1571,7 +1561,7 @@ ; ; CHECK-EX-LABEL: load_fold_ashr3: ; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movb (%rsi), %al +; CHECK-EX-NEXT: movq (%rsi), %rax ; CHECK-EX-NEXT: sarxq %rax, (%rdi), %rax ; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 @@ -2694,16 +2684,11 @@ ; Legal to reduce the load width (TODO) define i32 @fold_trunc(i64* %p) { -; CHECK-NOX-LABEL: fold_trunc: -; CHECK-NOX: # %bb.0: -; CHECK-NOX-NEXT: movq (%rdi), %rax -; CHECK-NOX-NEXT: # kill: def $eax killed $eax killed $rax -; CHECK-NOX-NEXT: retq -; -; CHECK-EX-LABEL: fold_trunc: -; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movl (%rdi), %eax -; CHECK-EX-NEXT: retq +; CHECK-LABEL: fold_trunc: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = trunc i64 %v to i32 ret i32 %ret @@ -2727,8 +2712,9 @@ ; ; CHECK-EX-LABEL: fold_trunc_add: ; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movl %esi, %eax -; CHECK-EX-NEXT: addl (%rdi), %eax +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: addl %esi, %eax +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 @@ -2754,8 +2740,9 @@ ; ; CHECK-EX-LABEL: fold_trunc_and: ; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movl %esi, %eax -; CHECK-EX-NEXT: andl (%rdi), %eax +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: andl %esi, %eax +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 @@ -2781,8 +2768,9 @@ ; ; CHECK-EX-LABEL: fold_trunc_or: ; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movl %esi, %eax -; CHECK-EX-NEXT: orl (%rdi), %eax +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: orl %esi, %eax +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 @@ -2864,17 +2852,11 @@ ; Legal to forward (TODO) define i64 @store_forward(i64* %p, i64 %v) { -; CHECK-NOX-LABEL: store_forward: -; CHECK-NOX: # %bb.0: -; CHECK-NOX-NEXT: movq %rsi, (%rdi) -; CHECK-NOX-NEXT: movq (%rdi), %rax -; CHECK-NOX-NEXT: retq -; -; CHECK-EX-LABEL: store_forward: -; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movq %rsi, %rax -; CHECK-EX-NEXT: movq %rsi, (%rdi) -; CHECK-EX-NEXT: retq +; CHECK-LABEL: store_forward: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, (%rdi) +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: retq store atomic i64 %v, i64* %p unordered, align 8 %ret = load atomic i64, i64* %p unordered, align 8 ret i64 %ret @@ -2894,16 +2876,11 @@ ; Legal to kill (TODO) define void @dead_store(i64* %p, i64 %v) { -; CHECK-NOX-LABEL: dead_store: -; CHECK-NOX: # %bb.0: -; CHECK-NOX-NEXT: movq $0, (%rdi) -; CHECK-NOX-NEXT: movq %rsi, (%rdi) -; CHECK-NOX-NEXT: retq -; -; CHECK-EX-LABEL: dead_store: -; CHECK-EX: # %bb.0: -; CHECK-EX-NEXT: movq %rsi, (%rdi) -; CHECK-EX-NEXT: retq +; CHECK-LABEL: dead_store: +; CHECK: # %bb.0: +; CHECK-NEXT: movq $0, (%rdi) +; CHECK-NEXT: movq %rsi, (%rdi) +; CHECK-NEXT: retq store atomic i64 0, i64* %p unordered, align 8 store atomic i64 %v, i64* %p unordered, align 8 ret void