diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1246,30 +1246,11 @@ HasRecMapType HasRecMap; /// The type for ExprValueMap. - using ValueOffsetPair = std::pair; - using ValueOffsetPairSetVector = SmallSetVector; - using ExprValueMapType = DenseMap; + using ValueSetVector = SmallSetVector; + using ExprValueMapType = DenseMap; /// ExprValueMap -- This map records the original values from which /// the SCEV expr is generated from. - /// - /// We want to represent the mapping as SCEV -> ValueOffsetPair instead - /// of SCEV -> Value: - /// Suppose we know S1 expands to V1, and - /// S1 = S2 + C_a - /// S3 = S2 + C_b - /// where C_a and C_b are different SCEVConstants. Then we'd like to - /// expand S3 as V1 - C_a + C_b instead of expanding S2 literally. - /// It is helpful when S2 is a complex SCEV expr. - /// - /// In order to do that, we represent ExprValueMap as a mapping from - /// SCEV to ValueOffsetPair. We will save both S1->{V1, 0} and - /// S2->{V1, C_a} into the map when we create SCEV for V1. When S3 - /// is expanded, it will first expand S2 to V1 - C_a because of - /// S2->{V1, C_a} in the map, then expand S3 to V1 - C_a + C_b. - /// - /// Note: S->{V, Offset} in the ExprValueMap means S can be expanded - /// to V - Offset. ExprValueMapType ExprValueMap; /// The type for ValueExprMap. @@ -1300,7 +1281,7 @@ DenseMap MinTrailingZerosCache; /// Return the Value set from which the SCEV expr is generated. - ValueOffsetPairSetVector *getSCEVValues(const SCEV *S); + ValueSetVector *getSCEVValues(const SCEV *S); /// Private helper method for the GetMinTrailingZeros method uint32_t GetMinTrailingZerosImpl(const SCEV *S); diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -385,8 +385,8 @@ /// Note that this function does not perform an exhaustive search. I.e if it /// didn't find any value it does not mean that there is no such value. /// - Optional - getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L); + Value *getRelatedExistingExpansion(const SCEV *S, const Instruction *At, + Loop *L); /// Returns a suitable insert point after \p I, that dominates \p /// MustDominate. Skips instructions inserted by the expander. @@ -444,8 +444,7 @@ Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V); /// Find a previous Value in ExprValueMap for expand. - ScalarEvolution::ValueOffsetPair - FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); + Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); Value *expand(const SCEV *S); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4278,27 +4278,9 @@ return FoundAddRec; } -/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}. -/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an -/// offset I, then return {S', I}, else return {\p S, nullptr}. -static std::pair splitAddExpr(const SCEV *S) { - const auto *Add = dyn_cast(S); - if (!Add) - return {S, nullptr}; - - if (Add->getNumOperands() != 2) - return {S, nullptr}; - - auto *ConstOp = dyn_cast(Add->getOperand(0)); - if (!ConstOp) - return {S, nullptr}; - - return {Add->getOperand(1), ConstOp->getValue()}; -} - /// Return the ValueOffsetPair set for \p S. \p S can be represented /// by the value and offset from any ValueOffsetPair in the set. -ScalarEvolution::ValueOffsetPairSetVector * +ScalarEvolution::ValueSetVector * ScalarEvolution::getSCEVValues(const SCEV *S) { ExprValueMapType::iterator SI = ExprValueMap.find_as(S); if (SI == ExprValueMap.end()) @@ -4306,8 +4288,8 @@ #ifndef NDEBUG if (VerifySCEVMap) { // Check there is no dangling Value in the set returned. - for (const auto &VE : SI->second) - assert(ValueExprMap.count(VE.first)); + for (Value *V : SI->second) + assert(ValueExprMap.count(V)); } #endif return &SI->second; @@ -4320,18 +4302,9 @@ ValueExprMapType::iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) { const SCEV *S = I->second; - // Remove {V, 0} from the set of ExprValueMap[S] + // Remove V from the set of ExprValueMap[S] if (auto *SV = getSCEVValues(S)) - SV->remove({V, nullptr}); - - // Remove {V, Offset} from the set of ExprValueMap[Stripped] - const SCEV *Stripped; - ConstantInt *Offset; - std::tie(Stripped, Offset) = splitAddExpr(S); - if (Offset != nullptr) { - if (auto *SV = getSCEVValues(Stripped)) - SV->remove({V, Offset}); - } + SV->remove(V); ValueExprMap.erase(V); } } @@ -4343,7 +4316,7 @@ auto It = ValueExprMap.find_as(V); if (It == ValueExprMap.end()) { ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - ExprValueMap[S].insert({V, nullptr}); + ExprValueMap[S].insert(V); } } @@ -4360,23 +4333,8 @@ // ValueExprMap before insert S->{V, 0} into ExprValueMap. std::pair Pair = ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - if (Pair.second) { - ExprValueMap[S].insert({V, nullptr}); - - // If S == Stripped + Offset, add Stripped -> {V, Offset} into - // ExprValueMap. - const SCEV *Stripped = S; - ConstantInt *Offset = nullptr; - std::tie(Stripped, Offset) = splitAddExpr(S); - // If stripped is SCEVUnknown, don't bother to save - // Stripped -> {V, offset}. It doesn't simplify and sometimes even - // increase the complexity of the expansion code. - // If V is GetElementPtrInst, don't save Stripped -> {V, offset} - // because it may generate add/sub instead of GEP in SCEV expansion. - if (Offset != nullptr && !isa(Stripped) && - !isa(V)) - ExprValueMap[Stripped].insert({V, Offset}); - } + if (Pair.second) + ExprValueMap[S].insert(V); } return S; } @@ -13399,12 +13357,10 @@ auto ExprIt = ExprValueMap.find(S); if (ExprIt != ExprValueMap.end()) { - for (auto &ValueAndOffset : ExprIt->second) { - if (ValueAndOffset.second == nullptr) { - auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first); - if (ValueIt != ValueExprMap.end()) - ValueExprMap.erase(ValueIt); - } + for (Value *V : ExprIt->second) { + auto ValueIt = ValueExprMap.find_as(V); + if (ValueIt != ValueExprMap.end()) + ValueExprMap.erase(ValueIt); } ExprValueMap.erase(ExprIt); } @@ -13546,7 +13502,7 @@ // Check that the value is also part of the reverse map. auto It = ExprValueMap.find(KV.second); - if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) { + if (It == ExprValueMap.end() || !It->second.contains(KV.first)) { dbgs() << "Value " << *KV.first << " is in ValueExprMap but not in ExprValueMap\n"; std::abort(); @@ -13554,19 +13510,15 @@ } for (const auto &KV : ExprValueMap) { - for (const auto &ValueAndOffset : KV.second) { - if (ValueAndOffset.second != nullptr) - continue; - - auto It = ValueExprMap.find_as(ValueAndOffset.first); + for (Value *V : KV.second) { + auto It = ValueExprMap.find_as(V); if (It == ValueExprMap.end()) { - dbgs() << "Value " << *ValueAndOffset.first + dbgs() << "Value " << *V << " is in ExprValueMap but not in ValueExprMap\n"; std::abort(); } if (It->second != KV.first) { - dbgs() << "Value " << *ValueAndOffset.first - << " mapped to " << *It->second + dbgs() << "Value " << *V << " mapped to " << *It->second << " rather than " << *KV.first << "\n"; std::abort(); } diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1870,9 +1870,8 @@ return V; } -ScalarEvolution::ValueOffsetPair -SCEVExpander::FindValueInExprValueMap(const SCEV *S, - const Instruction *InsertPt) { +Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S, + const Instruction *InsertPt) { auto *Set = SE.getSCEVValues(S); // If the expansion is not in CanonicalMode, and the SCEV contains any // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. @@ -1882,9 +1881,7 @@ // Choose a Value from the set which dominates the InsertPt. // InsertPt should be inside the Value's parent loop so as not to break // the LCSSA form. - for (auto const &VOPair : *Set) { - Value *V = VOPair.first; - ConstantInt *Offset = VOPair.second; + for (Value *V : *Set) { Instruction *EntInst = dyn_cast_or_null(V); if (!EntInst) continue; @@ -1894,11 +1891,11 @@ SE.DT.dominates(EntInst, InsertPt) && (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) - return {V, Offset}; + return V; } } } - return {nullptr, nullptr}; + return nullptr; } // The expansion of SCEV will either reuse a previous Value in ExprValueMap, @@ -1967,9 +1964,7 @@ Builder.SetInsertPoint(InsertPt); // Expand the expression into instructions. - ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt); - Value *V = VO.first; - + Value *V = FindValueInExprValueMap(S, InsertPt); if (!V) V = visit(S); else { @@ -1980,21 +1975,6 @@ if (auto *I = dyn_cast(V)) if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)) I->dropPoisonGeneratingFlags(); - - if (VO.second) { - if (PointerType *Vty = dyn_cast(V->getType())) { - int64_t Offset = VO.second->getSExtValue(); - ConstantInt *Idx = - ConstantInt::getSigned(VO.second->getType(), -Offset); - unsigned AS = Vty->getAddressSpace(); - V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); - V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, - "uglygep"); - V = Builder.CreateBitCast(V, Vty); - } else { - V = Builder.CreateSub(V, VO.second); - } - } } // Remember the expanded value for this SCEV at this location. // @@ -2176,9 +2156,9 @@ return NumElim; } -Optional -SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, - Loop *L) { +Value *SCEVExpander::getRelatedExistingExpansion(const SCEV *S, + const Instruction *At, + Loop *L) { using namespace llvm::PatternMatch; SmallVector ExitingBlocks; @@ -2195,25 +2175,17 @@ continue; if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) - return ScalarEvolution::ValueOffsetPair(LHS, nullptr); + return LHS; if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) - return ScalarEvolution::ValueOffsetPair(RHS, nullptr); + return RHS; } // Use expand's logic which is used for reusing a previous Value in // ExprValueMap. Note that we don't currently model the cost of // needing to drop poison generating flags on the instruction if we // want to reuse it. We effectively assume that has zero cost. - ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At); - if (VO.first) - return VO; - - // There is potential to make this significantly smarter, but this simple - // heuristic already gets some interesting cases. - - // Can not find suitable value. - return None; + return FindValueInExprValueMap(S, At); } template static InstructionCost costAndCollectOperands( diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll --- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll +++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll @@ -115,55 +115,57 @@ ; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax -; CHECK-NEXT: movl (%rax), %ecx -; CHECK-NEXT: andl $511, %ecx # imm = 0x1FF -; CHECK-NEXT: leaq 1(%rcx), %r13 -; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rax -; CHECK-NEXT: movl %r13d, (%rax) -; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rax ; CHECK-NEXT: movl (%rax), %edx -; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: andl $511, %eax # imm = 0x1FF +; CHECK-NEXT: leaq 1(%rax), %rsi +; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movl %esi, (%rcx) +; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movl (%rcx), %ecx +; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: je .LBB1_18 ; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph -; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rax -; CHECK-NEXT: movq (%rax), %r12 -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: notl %eax -; CHECK-NEXT: leaq 8(,%rax,8), %r14 -; CHECK-NEXT: imulq %r13, %r14 +; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rdi +; CHECK-NEXT: movq (%rdi), %r12 +; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: notl %edi +; CHECK-NEXT: leaq 8(,%rdi,8), %r14 +; CHECK-NEXT: imulq %rsi, %r14 ; CHECK-NEXT: addq %r12, %r14 ; CHECK-NEXT: movq x2@GOTPCREL(%rip), %r15 -; CHECK-NEXT: movl (%r15), %eax -; CHECK-NEXT: leal 8(,%rcx,8), %ecx -; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: leaq 8(%r12), %rcx -; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: leaq 32(%r12), %rbx -; CHECK-NEXT: shlq $3, %r13 -; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: movq x0@GOTPCREL(%rip), %rcx -; CHECK-NEXT: movq %r12, %rdi +; CHECK-NEXT: movl (%r15), %ebx +; CHECK-NEXT: leal 8(,%rax,8), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: leaq 8(%r12), %rax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: leaq 32(%r12), %rax +; CHECK-NEXT: andl $511, %edx # imm = 0x1FF +; CHECK-NEXT: leaq 8(,%rdx,8), %r13 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movq x0@GOTPCREL(%rip), %rdx +; CHECK-NEXT: movq %r12, %rsi ; CHECK-NEXT: jmp .LBB1_2 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB1_15: # %for.cond1.for.inc3_crit_edge ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: movl %eax, (%r15) +; CHECK-NEXT: movl %ebx, (%r15) ; CHECK-NEXT: .LBB1_16: # %for.inc3 ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: addq %r13, %rdi -; CHECK-NEXT: incq %rsi -; CHECK-NEXT: addq %r13, %rbx -; CHECK-NEXT: incl %edx +; CHECK-NEXT: addq %r13, %rsi +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: addq %r13, %rax +; CHECK-NEXT: incl %ecx ; CHECK-NEXT: je .LBB1_17 ; CHECK-NEXT: .LBB1_2: # %for.cond1thread-pre-split ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB1_12 Depth 2 ; CHECK-NEXT: # Child Loop BB1_14 Depth 2 -; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: testl %ebx, %ebx ; CHECK-NEXT: jns .LBB1_16 ; CHECK-NEXT: # %bb.3: # %for.body2.preheader ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: movslq %eax, %r9 +; CHECK-NEXT: movslq %ebx, %r9 ; CHECK-NEXT: testq %r9, %r9 ; CHECK-NEXT: movq $-1, %rbp ; CHECK-NEXT: cmovnsq %r9, %rbp @@ -178,76 +180,76 @@ ; CHECK-NEXT: je .LBB1_14 ; CHECK-NEXT: # %bb.5: # %vector.memcheck ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload -; CHECK-NEXT: imulq %rsi, %r10 -; CHECK-NEXT: leaq (%r12,%r10), %rax -; CHECK-NEXT: leaq (%rax,%r9,8), %rax +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload +; CHECK-NEXT: imulq %rdi, %r11 +; CHECK-NEXT: leaq (%r12,%r11), %rbx +; CHECK-NEXT: leaq (%rbx,%r9,8), %rbx ; CHECK-NEXT: testq %r9, %r9 -; CHECK-NEXT: movq $-1, %r11 -; CHECK-NEXT: cmovnsq %r9, %r11 -; CHECK-NEXT: cmpq %rcx, %rax +; CHECK-NEXT: movq $-1, %r10 +; CHECK-NEXT: cmovnsq %r9, %r10 +; CHECK-NEXT: cmpq %rdx, %rbx ; CHECK-NEXT: jae .LBB1_7 ; CHECK-NEXT: # %bb.6: # %vector.memcheck ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload -; CHECK-NEXT: leaq (%r10,%r11,8), %rax -; CHECK-NEXT: cmpq %rcx, %rax +; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; CHECK-NEXT: leaq (%r11,%r10,8), %rbx +; CHECK-NEXT: cmpq %rdx, %rbx ; CHECK-NEXT: ja .LBB1_14 ; CHECK-NEXT: .LBB1_7: # %vector.body.preheader ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: leaq -4(%r8), %rax -; CHECK-NEXT: movq %rax, %r10 -; CHECK-NEXT: shrq $2, %r10 -; CHECK-NEXT: btl $2, %eax +; CHECK-NEXT: leaq -4(%r8), %rbx +; CHECK-NEXT: movq %rbx, %r11 +; CHECK-NEXT: shrq $2, %r11 +; CHECK-NEXT: btl $2, %ebx ; CHECK-NEXT: jb .LBB1_8 ; CHECK-NEXT: # %bb.9: # %vector.body.prol.preheader ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; CHECK-NEXT: movdqu %xmm0, (%rdi,%r9,8) -; CHECK-NEXT: movdqu %xmm0, 16(%rdi,%r9,8) -; CHECK-NEXT: movl $4, %r11d -; CHECK-NEXT: testq %r10, %r10 +; CHECK-NEXT: movdqu %xmm0, (%rsi,%r9,8) +; CHECK-NEXT: movdqu %xmm0, 16(%rsi,%r9,8) +; CHECK-NEXT: movl $4, %r10d +; CHECK-NEXT: testq %r11, %r11 ; CHECK-NEXT: jne .LBB1_11 ; CHECK-NEXT: jmp .LBB1_13 ; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: xorl %r11d, %r11d -; CHECK-NEXT: testq %r10, %r10 +; CHECK-NEXT: xorl %r10d, %r10d +; CHECK-NEXT: testq %r11, %r11 ; CHECK-NEXT: je .LBB1_13 ; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; CHECK-NEXT: movq %r11, %rax -; CHECK-NEXT: subq %r8, %rax -; CHECK-NEXT: addq %r9, %r11 -; CHECK-NEXT: leaq (%rbx,%r11,8), %r11 +; CHECK-NEXT: movq %r10, %rbx +; CHECK-NEXT: subq %r8, %rbx +; CHECK-NEXT: addq %r9, %r10 +; CHECK-NEXT: leaq (%rax,%r10,8), %r10 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB1_12: # %vector.body ; CHECK-NEXT: # Parent Loop BB1_2 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movdqu %xmm0, -32(%r11) -; CHECK-NEXT: movdqu %xmm0, -16(%r11) -; CHECK-NEXT: movdqu %xmm0, (%r11) -; CHECK-NEXT: movdqu %xmm0, 16(%r11) -; CHECK-NEXT: addq $64, %r11 -; CHECK-NEXT: addq $8, %rax +; CHECK-NEXT: movdqu %xmm0, -32(%r10) +; CHECK-NEXT: movdqu %xmm0, -16(%r10) +; CHECK-NEXT: movdqu %xmm0, (%r10) +; CHECK-NEXT: movdqu %xmm0, 16(%r10) +; CHECK-NEXT: addq $64, %r10 +; CHECK-NEXT: addq $8, %rbx ; CHECK-NEXT: jne .LBB1_12 ; CHECK-NEXT: .LBB1_13: # %middle.block ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: addq %r8, %r9 ; CHECK-NEXT: cmpq %r8, %rbp -; CHECK-NEXT: movq %r9, %rax +; CHECK-NEXT: movq %r9, %rbx ; CHECK-NEXT: je .LBB1_15 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB1_14: # %for.body2 ; CHECK-NEXT: # Parent Loop BB1_2 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movq (%rcx), %rax -; CHECK-NEXT: movq %rax, (%rdi,%r9,8) -; CHECK-NEXT: leaq 1(%r9), %rax +; CHECK-NEXT: movq (%rdx), %rbp +; CHECK-NEXT: movq %rbp, (%rsi,%r9,8) +; CHECK-NEXT: leaq 1(%r9), %rbx ; CHECK-NEXT: cmpq $-1, %r9 -; CHECK-NEXT: movq %rax, %r9 +; CHECK-NEXT: movq %rbx, %r9 ; CHECK-NEXT: jl .LBB1_14 ; CHECK-NEXT: jmp .LBB1_15 ; CHECK-NEXT: .LBB1_17: # %for.cond.for.end5_crit_edge diff --git a/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll b/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll --- a/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll +++ b/llvm/test/Transforms/LoopIdiom/memset-runtime-32bit.ll @@ -239,7 +239,7 @@ ; CHECK-NEXT: [[MUL3:%.*]] = mul nsw i64 [[M:%.*]], 4 ; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[MUL3]] to i32 ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[M]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[CONV]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 4 [[AR1]], i8 0, i32 [[TMP3]], i1 false) diff --git a/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll b/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll --- a/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll +++ b/llvm/test/Transforms/LoopIdiom/memset-runtime-64bit.ll @@ -235,7 +235,7 @@ ; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[M:%.*]] to i64 ; CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[M]] to i64 ; CHECK-NEXT: [[MUL3:%.*]] = mul i64 [[CONV2]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[CONV]], -1 +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[CONV1]], [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 2 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[AR1]], i8 0, i64 [[TMP2]], i1 false)