Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -490,6 +491,10 @@ /// returns false. bool findBetterNeighborChains(StoreSDNode *St); + // Helper for findBetterNeighborChains. Walk up store chain add additional + // chained stores that do not overlap and can be parallelized. + bool parallelizeChainedStores(StoreSDNode *St); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -18905,6 +18910,11 @@ return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } +// TODO: Replace with with std::monostate when we move to C++17. +struct UnitT { } Unit; +bool operator==(const UnitT &, const UnitT &) { return true; } +bool operator!=(const UnitT &, const UnitT &) { return false; } + // This function tries to collect a bunch of potentially interesting // nodes to improve the chains of, all at once. This might seem // redundant, as this function gets called when visiting every store @@ -18917,13 +18927,22 @@ // the nodes that will eventually be candidates, and then not be able // to go from a partially-merged state to the desired final // fully-merged state. -bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None) - return false; + +bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { + SmallVector ChainedStores; + StoreSDNode *STChain = St; + // Intervals records which offsets from BaseIndex have been covered. In + // the common case, every store writes to the immediately previous address + // space and thus merged with the previous interval at insertion time. + + using IMap = + llvm::IntervalMap>; + IMap::Allocator A; + IMap Intervals(A); // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); + const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) @@ -18933,76 +18952,114 @@ if (BasePtr.getBase().isUndef()) return false; - SmallVector ChainedStores; - ChainedStores.push_back(St); + // Add ST's interval. + Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); - // Walk up the chain and look for nodes with offsets from the same - // base pointer. Stop when reaching an instruction with a different kind - // or instruction which has a different base pointer. - StoreSDNode *Index = St; - while (Index) { + while (StoreSDNode *Chain = dyn_cast(STChain->getChain())) { // If the chain has more than one use, then we can't reorder the mem ops. - if (Index != St && !SDValue(Index, 0)->hasOneUse()) + if (!SDValue(Chain, 0)->hasOneUse()) break; - - if (Index->isVolatile() || Index->isIndexed()) + if (Chain->isVolatile() || Chain->isIndexed()) break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG); - + const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG); // Check that the base pointer is the same as the original one. - if (!BasePtr.equalBaseIndex(Ptr, DAG)) + int64_t Offset; + if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset)) + break; + int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8; + // Make sure we don't overlap with other intervals by checking the ones to + // the left or right before inserting. + auto I = Intervals.find(Offset); + // If there's a next interval, we should end before it. + if (I != Intervals.end() && I.start() < (Offset + Length)) break; + // If there's a previous interval, we should start after it. + if (I != Intervals.begin() && (--I).stop() <= Offset) + break; + Intervals.insert(Offset, Offset + Length, Unit); - // Walk up the chain to find the next store node, ignoring any - // intermediate loads. Any other kind of node will halt the loop. - SDNode *NextInChain = Index->getChain().getNode(); - while (true) { - if (StoreSDNode *STn = dyn_cast(NextInChain)) { - // We found a store node. Use it for the next iteration. - if (STn->isVolatile() || STn->isIndexed()) { - Index = nullptr; - break; - } - ChainedStores.push_back(STn); - Index = STn; - break; - } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) { - NextInChain = Ldn->getChain().getNode(); - continue; - } else { - Index = nullptr; - break; - } - }// end while + ChainedStores.push_back(Chain); + STChain = Chain; } - // At this point, ChainedStores lists all of the Store nodes - // reachable by iterating up through chain nodes matching the above - // conditions. For each such store identified, try to find an - // earlier chain to attach the store to which won't violate the - // required ordering. - bool MadeChangeToSt = false; - SmallVector, 8> BetterChains; - - for (StoreSDNode *ChainedStore : ChainedStores) { - SDValue Chain = ChainedStore->getChain(); - SDValue BetterChain = FindBetterChain(ChainedStore, Chain); + // If we didn't find a chained store, exit. + if (ChainedStores.size() == 0) + return false; - if (Chain != BetterChain) { - if (ChainedStore == St) - MadeChangeToSt = true; - BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); - } - } + // Improve all chained stores (St and ChainedStores members) starting from + // where the store chain ended and return single TokenFactor. + SDValue NewChain = STChain->getChain(); + SmallVector TFOps; + for (unsigned I = ChainedStores.size(); I;) { + StoreSDNode *S = ChainedStores[--I]; + SDValue BetterChain = FindBetterChain(S, NewChain); + S = cast(DAG.UpdateNodeOperands( + S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3))); + TFOps.push_back(SDValue(S, 0)); + ChainedStores[I] = S; + } + + // Improve St's chain. Use a new node to avoid creating a loop from CombineTo. + SDValue BetterChain = FindBetterChain(St, NewChain); + SDValue NewST; + if (St->isTruncatingStore()) + NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(), + St->getBasePtr(), St->getMemoryVT(), + St->getMemOperand()); + else + NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(), + St->getBasePtr(), St->getMemOperand()); + + TFOps.push_back(NewST); + + // If we improved every element of TFOps, then we've lost the dependence on + // NewChain to successors of St and we need to add it back to TFOps. Do so at + // the beginning to keep relative order consistent with FindBetterChains. + auto hasImprovedChain = [&](SDValue ST) -> bool { + return ST->getOperand(0) != NewChain; + }; + bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain); + if (AddNewChain) + TFOps.insert(TFOps.begin(), NewChain); + + SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps); + CombineTo(St, TF); + + AddToWorklist(STChain); + // Add TF operands worklist in reverse order. + for (auto I = TF->getNumOperands(); I;) + AddToWorklist(TF->getOperand(--I).getNode()); + AddToWorklist(TF.getNode()); + return true; +} + +bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { + if (OptLevel == CodeGenOpt::None) + return false; + + const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); - // Do all replacements after finding the replacements to make to avoid making - // the chains more complicated by introducing new TokenFactors. - for (auto Replacement : BetterChains) - replaceStoreChain(Replacement.first, Replacement.second); + // We must have a base and an offset. + if (!BasePtr.getBase().getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.getBase().isUndef()) + return false; + + // Directly improve a chain of disjoint stores starting at St. + if (parallelizeChainedStores(St)) + return true; - return MadeChangeToSt; + // Improve St's Chain.. + SDValue BetterChain = FindBetterChain(St, St->getChain()); + if (St->getChain() != BetterChain) { + replaceStoreChain(St, BetterChain); + return true; + } + return false; } /// This is the entry point for the file. Index: llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -7,14 +7,13 @@ ; CHECK-LABEL: fn9: ; 9th fixed argument ; CHECK: ldr {{w[0-9]+}}, [sp, #64] -; CHECK: add [[ARGS:x[0-9]+]], sp, #72 -; CHECK: add {{x[0-9]+}}, [[ARGS]], #8 +; CHECK-DAG: add [[ARGS:x[0-9]+]], sp, #72 ; First vararg -; CHECK: ldr {{w[0-9]+}}, [sp, #72] +; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #72] ; Second vararg -; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8 +; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #80] ; Third vararg -; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8 +; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #88] %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 Index: llvm/trunk/test/CodeGen/AArch64/ldst-opt.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/ldst-opt.ll +++ llvm/trunk/test/CodeGen/AArch64/ldst-opt.ll @@ -1465,10 +1465,10 @@ define void @merge_zr32_3vec(<3 x i32>* %p) { ; CHECK-LABEL: merge_zr32_3vec: ; CHECK: // %entry -; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] ; NOSTRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8] -; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] -; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}, #8] +; NOSTRICTALIGN-NEXT: str xzr, [x{{[0-9]+}}] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #4] +; STRICTALIGN-NEXT: str wzr, [x{{[0-9]+}}] ; CHECK-NEXT: ret entry: store <3 x i32> zeroinitializer, <3 x i32>* %p @@ -1480,8 +1480,8 @@ ; CHECK-LABEL: merge_zr32_4vec: ; CHECK: // %entry ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] -; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] ; CHECK-NEXT: ret entry: store <4 x i32> zeroinitializer, <4 x i32>* %p @@ -1505,8 +1505,8 @@ ; CHECK-LABEL: merge_zr32_4vecf: ; CHECK: // %entry ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] -; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] ; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}, #8] +; STRICTALIGN-NEXT: stp wzr, wzr, [x{{[0-9]+}}] ; CHECK-NEXT: ret entry: store <4 x float> zeroinitializer, <4 x float>* %p @@ -1589,8 +1589,8 @@ define void @merge_zr64_3vec(<3 x i64>* %p) { ; CHECK-LABEL: merge_zr64_3vec: ; CHECK: // %entry -; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}] -; CHECK-NEXT: str xzr, [x{{[0-9]+}}, #16] +; CHECK-NEXT: stp xzr, xzr, [x{{[0-9]+}}, #8] +; CHECK-NEXT: str xzr, [x{{[0-9]+}}] ; CHECK-NEXT: ret entry: store <3 x i64> zeroinitializer, <3 x i64>* %p Index: llvm/trunk/test/CodeGen/AArch64/swifterror.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/swifterror.ll +++ llvm/trunk/test/CodeGen/AArch64/swifterror.ll @@ -314,13 +314,12 @@ ; CHECK-APPLE-DAG: strb [[ID]], [x0, #8] ; First vararg -; CHECK-APPLE-DAG: orr {{x[0-9]+}}, [[ARGS]], #0x8 ; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #16] ; Second vararg -; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8 +; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24] ; CHECK-APPLE-DAG: add {{x[0-9]+}}, {{x[0-9]+}}, #16 ; Third vararg -; CHECK-APPLE: ldr {{w[0-9]+}}, [{{x[0-9]+}}], #8 +; CHECK-APPLE-DAG: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32] ; CHECK-APPLE: mov x21, x0 ; CHECK-APPLE-NOT: x21 Index: llvm/trunk/test/CodeGen/ARM/arm-storebytesmerge.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-storebytesmerge.ll +++ llvm/trunk/test/CodeGen/ARM/arm-storebytesmerge.ll @@ -8,101 +8,95 @@ define arm_aapcs_vfpcc void @test(i8* %v50) #0 { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: -; CHECK-NEXT: movw r1, #35722 -; CHECK-NEXT: movt r1, #36236 -; CHECK-NEXT: str.w r1, [r0, #394] -; CHECK-NEXT: movw r1, #36750 -; CHECK-NEXT: movt r1, #37264 -; CHECK-NEXT: str.w r1, [r0, #398] -; CHECK-NEXT: movw r1, #37778 -; CHECK-NEXT: movt r1, #38292 -; CHECK-NEXT: str.w r1, [r0, #402] -; CHECK-NEXT: movw r1, #38806 -; CHECK-NEXT: movt r1, #39320 -; CHECK-NEXT: str.w r1, [r0, #406] -; CHECK-NEXT: movw r1, #39834 -; CHECK-NEXT: strh.w r1, [r0, #410] -; CHECK-NEXT: movw r1, #40348 -; CHECK-NEXT: movt r1, #40862 -; CHECK-NEXT: str.w r1, [r0, #412] -; CHECK-NEXT: movw r1, #41376 -; CHECK-NEXT: movt r1, #41890 -; CHECK-NEXT: str.w r1, [r0, #416] -; CHECK-NEXT: movw r1, #42404 -; CHECK-NEXT: movt r1, #42918 -; CHECK-NEXT: str.w r1, [r0, #420] -; CHECK-NEXT: movw r1, #43432 -; CHECK-NEXT: movt r1, #43946 -; CHECK-NEXT: str.w r1, [r0, #424] -; CHECK-NEXT: movw r1, #44460 -; CHECK-NEXT: movt r1, #44974 -; CHECK-NEXT: str.w r1, [r0, #428] -; CHECK-NEXT: movw r1, #45488 -; CHECK-NEXT: strh.w r1, [r0, #432] +; CHECK-NEXT: movw r1, #65534 +; CHECK-NEXT: strh.w r1, [r0, #510] +; CHECK-NEXT: movw r1, #64506 +; CHECK-NEXT: movt r1, #65020 +; CHECK-NEXT: str.w r1, [r0, #506] +; CHECK-NEXT: movw r1, #63478 +; CHECK-NEXT: movt r1, #63992 +; CHECK-NEXT: str.w r1, [r0, #502] +; CHECK-NEXT: movw r1, #62450 +; CHECK-NEXT: movt r1, #62964 +; CHECK-NEXT: str.w r1, [r0, #498] +; CHECK-NEXT: movw r1, #61422 +; CHECK-NEXT: movt r1, #61936 +; CHECK-NEXT: str.w r1, [r0, #494] +; CHECK-NEXT: movw r1, #60394 +; CHECK-NEXT: movt r1, #60908 +; CHECK-NEXT: str.w r1, [r0, #490] +; CHECK-NEXT: movw r1, #59366 +; CHECK-NEXT: movt r1, #59880 +; CHECK-NEXT: str.w r1, [r0, #486] +; CHECK-NEXT: movw r1, #58338 +; CHECK-NEXT: movt r1, #58852 +; CHECK-NEXT: str.w r1, [r0, #482] +; CHECK-NEXT: movw r1, #57310 +; CHECK-NEXT: movt r1, #57824 +; CHECK-NEXT: str.w r1, [r0, #478] +; CHECK-NEXT: movw r1, #56282 +; CHECK-NEXT: movt r1, #56796 +; CHECK-NEXT: str.w r1, [r0, #474] +; CHECK-NEXT: movw r1, #55254 +; CHECK-NEXT: movt r1, #55768 +; CHECK-NEXT: str.w r1, [r0, #470] +; CHECK-NEXT: movw r1, #54226 +; CHECK-NEXT: movt r1, #54740 +; CHECK-NEXT: str.w r1, [r0, #466] +; CHECK-NEXT: movw r1, #53198 +; CHECK-NEXT: movt r1, #53712 +; CHECK-NEXT: str.w r1, [r0, #462] +; CHECK-NEXT: movw r1, #52170 +; CHECK-NEXT: movt r1, #52684 +; CHECK-NEXT: str.w r1, [r0, #458] +; CHECK-NEXT: movw r1, #51142 +; CHECK-NEXT: movt r1, #51656 +; CHECK-NEXT: str.w r1, [r0, #454] +; CHECK-NEXT: movw r1, #50114 +; CHECK-NEXT: movt r1, #50628 +; CHECK-NEXT: str.w r1, [r0, #450] +; CHECK-NEXT: movw r1, #49086 +; CHECK-NEXT: movt r1, #49600 +; CHECK-NEXT: str.w r1, [r0, #446] +; CHECK-NEXT: movw r1, #48058 +; CHECK-NEXT: movt r1, #48572 +; CHECK-NEXT: str.w r1, [r0, #442] +; CHECK-NEXT: movw r1, #47030 +; CHECK-NEXT: movt r1, #47544 +; CHECK-NEXT: str.w r1, [r0, #438] ; CHECK-NEXT: movw r1, #46002 ; CHECK-NEXT: movt r1, #46516 ; CHECK-NEXT: str.w r1, [r0, #434] -; CHECK-NEXT: movw r1, #47030 -; CHECK-NEXT: strh.w r1, [r0, #438] -; CHECK-NEXT: movw r1, #47544 -; CHECK-NEXT: movt r1, #48058 -; CHECK-NEXT: str.w r1, [r0, #440] -; CHECK-NEXT: movw r1, #48572 -; CHECK-NEXT: movt r1, #49086 -; CHECK-NEXT: str.w r1, [r0, #444] -; CHECK-NEXT: movw r1, #49600 -; CHECK-NEXT: strh.w r1, [r0, #448] -; CHECK-NEXT: movs r1, #194 -; CHECK-NEXT: strb.w r1, [r0, #450] -; CHECK-NEXT: movw r1, #50371 -; CHECK-NEXT: movt r1, #50885 -; CHECK-NEXT: str.w r1, [r0, #451] -; CHECK-NEXT: movw r1, #51399 -; CHECK-NEXT: movt r1, #51913 -; CHECK-NEXT: str.w r1, [r0, #455] -; CHECK-NEXT: movw r1, #52427 -; CHECK-NEXT: movt r1, #52941 -; CHECK-NEXT: str.w r1, [r0, #459] -; CHECK-NEXT: movw r1, #53455 -; CHECK-NEXT: movt r1, #53969 -; CHECK-NEXT: str.w r1, [r0, #463] -; CHECK-NEXT: movw r1, #54483 -; CHECK-NEXT: strh.w r1, [r0, #467] -; CHECK-NEXT: movw r1, #54997 -; CHECK-NEXT: movt r1, #55511 -; CHECK-NEXT: str.w r1, [r0, #469] -; CHECK-NEXT: movw r1, #56025 -; CHECK-NEXT: movt r1, #56539 -; CHECK-NEXT: str.w r1, [r0, #473] -; CHECK-NEXT: movw r1, #57053 -; CHECK-NEXT: movt r1, #57567 -; CHECK-NEXT: str.w r1, [r0, #477] -; CHECK-NEXT: movw r1, #58081 -; CHECK-NEXT: movt r1, #58595 -; CHECK-NEXT: str.w r1, [r0, #481] -; CHECK-NEXT: movw r1, #59109 -; CHECK-NEXT: movt r1, #59623 -; CHECK-NEXT: str.w r1, [r0, #485] -; CHECK-NEXT: movw r1, #60137 -; CHECK-NEXT: strh.w r1, [r0, #489] -; CHECK-NEXT: movw r1, #60651 -; CHECK-NEXT: movt r1, #61165 -; CHECK-NEXT: str.w r1, [r0, #491] -; CHECK-NEXT: movw r1, #61679 -; CHECK-NEXT: strh.w r1, [r0, #495] -; CHECK-NEXT: movw r1, #62193 -; CHECK-NEXT: movt r1, #62707 -; CHECK-NEXT: str.w r1, [r0, #497] -; CHECK-NEXT: movw r1, #63221 -; CHECK-NEXT: movt r1, #63735 -; CHECK-NEXT: str.w r1, [r0, #501] -; CHECK-NEXT: movw r1, #64249 -; CHECK-NEXT: strh.w r1, [r0, #505] -; CHECK-NEXT: movs r1, #251 -; CHECK-NEXT: strb.w r1, [r0, #507] -; CHECK-NEXT: movw r1, #65020 -; CHECK-NEXT: movt r1, #65534 -; CHECK-NEXT: str.w r1, [r0, #508] +; CHECK-NEXT: movw r1, #44974 +; CHECK-NEXT: movt r1, #45488 +; CHECK-NEXT: str.w r1, [r0, #430] +; CHECK-NEXT: movw r1, #43946 +; CHECK-NEXT: movt r1, #44460 +; CHECK-NEXT: str.w r1, [r0, #426] +; CHECK-NEXT: movw r1, #42918 +; CHECK-NEXT: movt r1, #43432 +; CHECK-NEXT: str.w r1, [r0, #422] +; CHECK-NEXT: movw r1, #41890 +; CHECK-NEXT: movt r1, #42404 +; CHECK-NEXT: str.w r1, [r0, #418] +; CHECK-NEXT: movw r1, #40862 +; CHECK-NEXT: movt r1, #41376 +; CHECK-NEXT: str.w r1, [r0, #414] +; CHECK-NEXT: movw r1, #39834 +; CHECK-NEXT: movt r1, #40348 +; CHECK-NEXT: str.w r1, [r0, #410] +; CHECK-NEXT: movw r1, #38806 +; CHECK-NEXT: movt r1, #39320 +; CHECK-NEXT: str.w r1, [r0, #406] +; CHECK-NEXT: movw r1, #37778 +; CHECK-NEXT: movt r1, #38292 +; CHECK-NEXT: str.w r1, [r0, #402] +; CHECK-NEXT: movw r1, #36750 +; CHECK-NEXT: movt r1, #37264 +; CHECK-NEXT: str.w r1, [r0, #398] +; CHECK-NEXT: movw r1, #35722 +; CHECK-NEXT: movt r1, #36236 +; CHECK-NEXT: str.w r1, [r0, #394] ; CHECK-NEXT: bx lr %v190 = getelementptr inbounds i8, i8* %v50, i32 394 store i8 -118, i8* %v190, align 1 Index: llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll +++ llvm/trunk/test/CodeGen/ARM/misched-fusion-aes.ll @@ -72,20 +72,27 @@ ; CHECK-LABEL: aesea: ; CHECK: aese.8 [[QA:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]] + ; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]] -; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} + ; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]] + +; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]] + +; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]] -; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} + ; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]] + ; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]] + ; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aese.8 [[QH:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QH]] @@ -160,14 +167,14 @@ ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]] ; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]] -; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]] +; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]] +; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]] -; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}} ; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}} ; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]] ; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}} Index: llvm/trunk/test/CodeGen/Mips/fastcc.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/fastcc.ll +++ llvm/trunk/test/CodeGen/Mips/fastcc.ll @@ -223,24 +223,24 @@ define internal fastcc void @callee1(float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8, float %a9, float %a10, float %a11, float %a12, float %a13, float %a14, float %a15, float %a16, float %a17, float %a18, float %a19, float %a20) nounwind noinline { entry: ; CHECK-LABEL: callee1: -; CHECK-DAG: swc1 $f0 -; CHECK-DAG: swc1 $f1 -; CHECK-DAG: swc1 $f2 -; CHECK-DAG: swc1 $f3 -; CHECK-DAG: swc1 $f4 -; CHECK-DAG: swc1 $f5 -; CHECK-DAG: swc1 $f6 -; CHECK-DAG: swc1 $f7 -; CHECK-DAG: swc1 $f8 -; CHECK-DAG: swc1 $f9 -; CHECK-DAG: swc1 $f10 -; CHECK-DAG: swc1 $f11 -; CHECK-DAG: swc1 $f12 -; CHECK-DAG: swc1 $f13 -; CHECK-DAG: swc1 $f14 -; CHECK-DAG: swc1 $f15 -; CHECK-DAG: swc1 $f16 ; CHECK-DAG: swc1 $f17 +; CHECK-DAG: swc1 $f16 +; CHECK-DAG: swc1 $f15 +; CHECK-DAG: swc1 $f14 +; CHECK-DAG: swc1 $f13 +; CHECK-DAG: swc1 $f12 +; CHECK-DAG: swc1 $f11 +; CHECK-DAG: swc1 $f10 +; CHECK-DAG: swc1 $f9 +; CHECK-DAG: swc1 $f8 +; CHECK-DAG: swc1 $f7 +; CHECK-DAG: swc1 $f6 +; CHECK-DAG: swc1 $f5 +; CHECK-DAG: swc1 $f4 +; CHECK-DAG: swc1 $f3 +; CHECK-DAG: swc1 $f2 +; CHECK-DAG: swc1 $f1 +; CHECK-DAG: swc1 $f0 ; CHECK-DAG: swc1 $f18 ; CHECK-DAG: swc1 $f19 @@ -330,7 +330,7 @@ ; NOODDSPREG-DAG: swc1 $f16, 32($[[R0]]) ; NOODDSPREG-DAG: swc1 $f18, 36($[[R0]]) -; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 0($sp) +; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], {{[0-9]+}}($sp) ; NOODDSPREG-DAG: swc1 $[[F0]], 40($[[R0]]) store float %a0, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4 Index: llvm/trunk/test/CodeGen/SystemZ/pr36164.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/pr36164.ll +++ llvm/trunk/test/CodeGen/SystemZ/pr36164.ll @@ -15,54 +15,39 @@ define void @main() local_unnamed_addr #0 { ; CHECK-LABEL: main: ; CHECK: # %bb.0: -; CHECK-NEXT: stmg %r12, %r15, 96(%r15) -; CHECK-NEXT: .cfi_offset %r12, -64 -; CHECK-NEXT: .cfi_offset %r13, -56 -; CHECK-NEXT: .cfi_offset %r14, -48 -; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: lhi %r0, 1 ; CHECK-NEXT: larl %r1, g_938 -; CHECK-NEXT: lhi %r2, 2 -; CHECK-NEXT: lhi %r3, 3 -; CHECK-NEXT: lhi %r4, 0 -; CHECK-NEXT: lhi %r5, 4 -; CHECK-NEXT: larl %r14, g_11 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lhi %r3, 4 +; CHECK-NEXT: larl %r4, g_11 ; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: strl %r0, g_73 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: strl %r0, g_69 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-DAG: lghi %r13, 24 -; CHECK-DAG: strl %r2, g_69 -; CHECK-DAG: ag %r13, 0(%r1) -; CHECK-NEXT: lrl %r12, g_832 -; CHECK-NEXT: strl %r3, g_69 -; CHECK-NEXT: lrl %r12, g_832 -; CHECK-NEXT: strl %r4, g_69 -; CHECK-NEXT: lrl %r12, g_832 -; CHECK-NEXT: strl %r0, g_69 -; CHECK-NEXT: lrl %r12, g_832 ; CHECK-NEXT: strl %r2, g_69 -; CHECK-NEXT: lrl %r12, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: lrl %r5, g_832 +; CHECK-NEXT: agsi 0(%r1), 24 +; CHECK-NEXT: lrl %r5, g_832 ; CHECK-NEXT: strl %r3, g_69 -; CHECK-NEXT: stgrl %r13, g_938 -; CHECK-NEXT: lrl %r13, g_832 -; CHECK-NEXT: strl %r5, g_69 -; CHECK-NEXT: mvi 0(%r14), 1 +; CHECK-NEXT: mvi 0(%r4), 1 ; CHECK-NEXT: j .LBB0_1 br label %1 Index: llvm/trunk/test/CodeGen/X86/stores-merging.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/stores-merging.ll +++ llvm/trunk/test/CodeGen/X86/stores-merging.ll @@ -13,9 +13,8 @@ define void @redundant_stores_merging() { ; CHECK-LABEL: redundant_stores_merging: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $528280977409, %rax # imm = 0x7B00000001 +; CHECK-NEXT: movabsq $1958505086977, %rax # imm = 0x1C800000001 ; CHECK-NEXT: movq %rax, e+{{.*}}(%rip) -; CHECK-NEXT: movl $456, e+{{.*}}(%rip) # imm = 0x1C8 ; CHECK-NEXT: retq store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4 store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4