diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1923,6 +1923,9 @@ /// Erase Value from ValueExprMap and ExprValueMap. void eraseValueFromMap(Value *V); + /// Insert V to S mapping into ValueExprMap and ExprValueMap. + void insertValueToMap(Value *V, const SCEV *S); + /// Return false iff given SCEV contains a SCEVUnknown with NULL value- /// pointer. bool checkValidity(const SCEV *S) const; diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4094,6 +4094,13 @@ } } +void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) { + assert(ValueExprMap.find_as(V) == ValueExprMap.end() && + "Must not be in value map yet"); + ValueExprMap.insert({SCEVCallbackVH(V, this), S}); + ExprValueMap[S].insert({V, nullptr}); +} + /// Return an existing SCEV if it exists, otherwise analyze the expression and /// create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { @@ -4455,9 +4462,7 @@ // In the second case, createNodeForPHI will perform the necessary // updates on its own when it gets to that point. In the third, we do // want to forget the SCEVUnknown. - if (!isa(I) || - !isa(Old) || - (I != PN && Old == SymName)) { + if (!isa(I) || !isa(Old) || Old == SymName) { eraseValueFromMap(It->first); ToForget.push_back(Old); } @@ -5336,7 +5341,13 @@ const SCEV *StartVal = getSCEV(StartValueV); const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); - ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + // The recursive queries above may have already computed the SCEV. + // However, it must be the same as the one computed here. + auto It = ValueExprMap.find_as(PN); + if (It != ValueExprMap.end()) + assert(It->second == PHISCEV); + else + insertValueToMap(PN, PHISCEV); // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to @@ -5386,7 +5397,7 @@ // Handle PHI node value symbolically. const SCEV *SymbolicName = getUnknown(PN); - ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName}); + insertValueToMap(PN, SymbolicName); // Using this symbolic name for the PHI, analyze the value coming around // the back-edge. @@ -5458,7 +5469,7 @@ // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. forgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + insertValueToMap(PN, PHISCEV); // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to @@ -5490,7 +5501,7 @@ // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. forgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; + insertValueToMap(PN, Shifted); return Shifted; } } @@ -12993,9 +13004,20 @@ BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); - ExprValueMap.erase(S); HasRecMap.erase(S); MinTrailingZerosCache.erase(S); + + auto ExprIt = ExprValueMap.find(S); + if (ExprIt != ExprValueMap.end()) { + for (auto &ValueAndOffset : ExprIt->second) { + if (ValueAndOffset.second == nullptr) { + auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first); + if (ValueIt != ValueExprMap.end()) + ValueExprMap.erase(ValueIt); + } + } + ExprValueMap.erase(ExprIt); + } } void @@ -13100,13 +13122,40 @@ ValidLoops.insert(L); Worklist.append(L->begin(), L->end()); } - // Check for SCEV expressions referencing invalid/deleted loops. for (auto &KV : ValueExprMap) { - auto *AR = dyn_cast(KV.second); - if (!AR) - continue; - assert(ValidLoops.contains(AR->getLoop()) && - "AddRec references invalid loop"); + // Check for SCEV expressions referencing invalid/deleted loops. + if (auto *AR = dyn_cast(KV.second)) { + assert(ValidLoops.contains(AR->getLoop()) && + "AddRec references invalid loop"); + } + + // Check that the value is also part of the reverse map. + auto It = ExprValueMap.find(KV.second); + if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) { + dbgs() << "Value " << *KV.first + << " is in ValueExprMap but not in ExprValueMap\n"; + std::abort(); + } + } + + for (const auto &KV : ExprValueMap) { + for (const auto &ValueAndOffset : KV.second) { + if (ValueAndOffset.second != nullptr) + continue; + + auto It = ValueExprMap.find_as(ValueAndOffset.first); + if (It == ValueExprMap.end()) { + dbgs() << "Value " << *ValueAndOffset.first + << " is in ExprValueMap but not in ValueExprMap\n"; + std::abort(); + } + if (It->second != KV.first) { + dbgs() << "Value " << *ValueAndOffset.first + << " mapped to " << *It->second + << " rather than " << *KV.first << "\n"; + std::abort(); + } + } } // Verify intergity of SCEV users. diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -10,8 +10,8 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_type_of_x]* %.x, i32* %.l, <2 x double>* %.vy01, <2 x double>* %.vy02, <2 x double>* %.vy03, <2 x double>* %.vy04, <2 x double>* %.vy05, <2 x double>* %.vy06, <2 x double>* %.vy07, <2 x double>* %.vy08, <2 x double>* %.vy09, <2 x double>* %.vy0a, <2 x double>* %.vy0b, <2 x double>* %.vy0c, <2 x double>* %.vy21, <2 x double>* %.vy22, <2 x double>* %.vy23, <2 x double>* %.vy24, <2 x double>* %.vy25, <2 x double>* %.vy26, <2 x double>* %.vy27, <2 x double>* %.vy28, <2 x double>* %.vy29, <2 x double>* %.vy2a, <2 x double>* %.vy2b, <2 x double>* %.vy2c) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stdu 1, -608(1) -; CHECK-NEXT: .cfi_def_cfa_offset 608 +; CHECK-NEXT: stdu 1, -592(1) +; CHECK-NEXT: .cfi_def_cfa_offset 592 ; CHECK-NEXT: .cfi_offset r14, -192 ; CHECK-NEXT: .cfi_offset r15, -184 ; CHECK-NEXT: .cfi_offset r16, -176 @@ -48,322 +48,313 @@ ; CHECK-NEXT: .cfi_offset v29, -240 ; CHECK-NEXT: .cfi_offset v30, -224 ; CHECK-NEXT: .cfi_offset v31, -208 -; CHECK-NEXT: lwz 0, 0(4) -; CHECK-NEXT: std 14, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: cmpwi 0, 1 -; CHECK-NEXT: std 16, 432(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: std 18, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, 464(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 472(1) # 8-byte Folded Spill -; CHECK-NEXT: std 22, 480(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 488(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 496(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 504(1) # 8-byte Folded Spill -; CHECK-NEXT: std 26, 512(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 520(1) # 8-byte Folded Spill -; CHECK-NEXT: std 28, 528(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 536(1) # 8-byte Folded Spill -; CHECK-NEXT: std 30, 544(1) # 8-byte Folded Spill -; CHECK-NEXT: std 31, 552(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 26, 560(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 27, 568(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 28, 576(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 29, 584(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 30, 592(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 31, 600(1) # 8-byte Folded Spill -; CHECK-NEXT: stxv 52, 224(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 53, 240(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 54, 256(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 55, 272(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 56, 288(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 57, 304(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 58, 320(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 59, 336(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 60, 352(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 61, 368(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 62, 384(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 63, 400(1) # 16-byte Folded Spill +; CHECK-NEXT: lwz 4, 0(4) +; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill +; CHECK-NEXT: cmpwi 4, 1 +; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill +; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill +; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill +; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill ; CHECK-NEXT: blt 0, .LBB0_7 ; CHECK-NEXT: # %bb.1: # %_loop_1_do_.lr.ph ; CHECK-NEXT: lwz 3, 0(3) ; CHECK-NEXT: cmpwi 3, 1 ; CHECK-NEXT: blt 0, .LBB0_7 ; CHECK-NEXT: # %bb.2: # %_loop_1_do_.preheader -; CHECK-NEXT: mr 23, 5 -; CHECK-NEXT: ld 5, 704(1) +; CHECK-NEXT: mr 21, 5 +; CHECK-NEXT: ld 5, 848(1) +; CHECK-NEXT: lwa 0, 0(7) ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: ld 4, 728(1) +; CHECK-NEXT: mr 14, 7 +; CHECK-NEXT: mr 22, 6 ; CHECK-NEXT: mr 11, 10 -; CHECK-NEXT: mr 10, 6 -; CHECK-NEXT: std 8, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: std 9, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: lwa 7, 0(7) -; CHECK-NEXT: ld 29, 840(1) +; CHECK-NEXT: ld 18, 736(1) +; CHECK-NEXT: ld 17, 728(1) +; CHECK-NEXT: std 8, 32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 9, 40(1) # 8-byte Folded Spill ; CHECK-NEXT: cmpldi 3, 9 -; CHECK-NEXT: ld 27, 832(1) -; CHECK-NEXT: ld 28, 856(1) -; CHECK-NEXT: std 5, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: std 4, 120(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 1, 0(5) +; CHECK-NEXT: ld 19, 744(1) +; CHECK-NEXT: ld 20, 752(1) +; CHECK-NEXT: std 5, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 5, 840(1) +; CHECK-NEXT: std 17, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 18, 88(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 36, 0(18) +; CHECK-NEXT: std 19, 96(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 104(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 13, 0(19) +; CHECK-NEXT: lxv 12, 0(20) +; CHECK-NEXT: ld 30, 832(1) +; CHECK-NEXT: ld 2, 824(1) +; CHECK-NEXT: ld 12, 816(1) +; CHECK-NEXT: ld 29, 808(1) +; CHECK-NEXT: std 2, 176(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 160(1) # 8-byte Folded Spill +; CHECK-NEXT: std 12, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: std 5, 192(1) # 8-byte Folded Spill ; CHECK-NEXT: li 5, 9 -; CHECK-NEXT: ld 30, 848(1) -; CHECK-NEXT: lxv 0, 0(4) -; CHECK-NEXT: sldi 4, 7, 3 -; CHECK-NEXT: add 4, 4, 23 -; CHECK-NEXT: sldi 16, 7, 2 -; CHECK-NEXT: sldi 15, 7, 1 -; CHECK-NEXT: ld 17, 760(1) -; CHECK-NEXT: std 27, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 6, 0(29) -; CHECK-NEXT: ld 26, 824(1) -; CHECK-NEXT: ld 25, 816(1) -; CHECK-NEXT: ld 24, 808(1) -; CHECK-NEXT: std 30, 208(1) # 8-byte Folded Spill -; CHECK-NEXT: std 28, 216(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: std 26, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: iselgt 3, 3, 5 -; CHECK-NEXT: ld 5, 752(1) -; CHECK-NEXT: addi 14, 4, 32 -; CHECK-NEXT: sldi 4, 7, 4 -; CHECK-NEXT: add 29, 7, 15 -; CHECK-NEXT: ld 22, 800(1) -; CHECK-NEXT: ld 21, 792(1) -; CHECK-NEXT: ld 20, 784(1) -; CHECK-NEXT: std 22, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 152(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 6, 3, -2 -; CHECK-NEXT: add 3, 7, 16 -; CHECK-NEXT: add 4, 4, 23 -; CHECK-NEXT: ld 19, 776(1) -; CHECK-NEXT: ld 18, 768(1) -; CHECK-NEXT: lxv 4, 0(8) -; CHECK-NEXT: lxv 2, 0(11) -; CHECK-NEXT: std 18, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 12, 4, 32 -; CHECK-NEXT: rldicl 2, 6, 61, 3 -; CHECK-NEXT: sldi 6, 3, 3 -; CHECK-NEXT: ld 4, 736(1) -; CHECK-NEXT: ld 31, 720(1) -; CHECK-NEXT: std 11, 56(1) # 8-byte Folded Spill -; CHECK-NEXT: std 31, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: add 11, 23, 6 -; CHECK-NEXT: ld 6, 744(1) -; CHECK-NEXT: ld 8, 712(1) -; CHECK-NEXT: std 5, 96(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 39, 0(5) -; CHECK-NEXT: sldi 5, 7, 5 -; CHECK-NEXT: lxv 5, 0(30) -; CHECK-NEXT: lxv 7, 0(28) -; CHECK-NEXT: lxv 3, 0(9) -; CHECK-NEXT: addi 2, 2, 1 -; CHECK-NEXT: add 30, 23, 5 -; CHECK-NEXT: sldi 5, 29, 3 -; CHECK-NEXT: add 28, 23, 5 -; CHECK-NEXT: ld 5, 864(1) +; CHECK-NEXT: ld 28, 800(1) +; CHECK-NEXT: ld 27, 792(1) +; CHECK-NEXT: ld 26, 784(1) +; CHECK-NEXT: ld 25, 776(1) +; CHECK-NEXT: ld 24, 768(1) +; CHECK-NEXT: ld 23, 760(1) +; CHECK-NEXT: ld 16, 720(1) +; CHECK-NEXT: ld 15, 712(1) +; CHECK-NEXT: ld 6, 704(1) +; CHECK-NEXT: ld 7, 696(1) +; CHECK-NEXT: ld 10, 688(1) ; CHECK-NEXT: lxv 43, 0(8) -; CHECK-NEXT: lxv 42, 0(31) -; CHECK-NEXT: lxv 38, 0(17) -; CHECK-NEXT: std 4, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: std 6, 80(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 41, 0(4) -; CHECK-NEXT: lxv 40, 0(6) -; CHECK-NEXT: lxv 33, 0(18) -; CHECK-NEXT: lxv 32, 0(19) -; CHECK-NEXT: std 5, 88(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 37, 0(20) -; CHECK-NEXT: lxv 36, 0(21) -; CHECK-NEXT: lxv 13, 0(22) -; CHECK-NEXT: lxv 12, 0(24) -; CHECK-NEXT: lxv 11, 0(25) -; CHECK-NEXT: lxv 9, 0(26) -; CHECK-NEXT: lxv 8, 0(27) -; CHECK-NEXT: lxv 10, 0(5) -; CHECK-NEXT: mulli 27, 7, 48 -; CHECK-NEXT: mulli 26, 7, 6 -; CHECK-NEXT: li 25, 1 -; CHECK-NEXT: li 24, 0 -; CHECK-NEXT: mr 5, 23 +; CHECK-NEXT: std 11, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: std 6, 56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 8, 7 +; CHECK-NEXT: std 25, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 16, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: iselgt 3, 3, 5 +; CHECK-NEXT: sldi 5, 0, 3 +; CHECK-NEXT: add 5, 5, 21 +; CHECK-NEXT: lxv 42, 0(9) +; CHECK-NEXT: lxv 41, 0(11) +; CHECK-NEXT: lxv 40, 0(10) +; CHECK-NEXT: lxv 39, 0(7) +; CHECK-NEXT: mulli 11, 0, 48 +; CHECK-NEXT: addi 14, 5, 32 +; CHECK-NEXT: sldi 5, 0, 4 +; CHECK-NEXT: addi 3, 3, -2 +; CHECK-NEXT: lxv 38, 0(6) +; CHECK-NEXT: lxv 33, 0(15) +; CHECK-NEXT: lxv 32, 0(16) +; CHECK-NEXT: lxv 37, 0(17) +; CHECK-NEXT: add 5, 5, 21 +; CHECK-NEXT: lxv 11, 0(23) +; CHECK-NEXT: lxv 10, 0(24) +; CHECK-NEXT: lxv 8, 0(25) +; CHECK-NEXT: lxv 6, 0(26) +; CHECK-NEXT: rldicl 3, 3, 61, 3 +; CHECK-NEXT: li 26, 0 +; CHECK-NEXT: mr 25, 21 +; CHECK-NEXT: addi 31, 5, 32 +; CHECK-NEXT: mulli 5, 0, 40 +; CHECK-NEXT: lxv 5, 0(27) +; CHECK-NEXT: lxv 3, 0(28) +; CHECK-NEXT: lxv 1, 0(29) +; CHECK-NEXT: lxv 0, 0(12) +; CHECK-NEXT: mulli 28, 0, 6 +; CHECK-NEXT: addi 3, 3, 1 +; CHECK-NEXT: li 27, 1 +; CHECK-NEXT: add 18, 21, 5 +; CHECK-NEXT: sldi 5, 0, 5 +; CHECK-NEXT: lxv 2, 0(2) +; CHECK-NEXT: lxv 4, 0(30) +; CHECK-NEXT: sldi 2, 0, 1 +; CHECK-NEXT: add 19, 21, 5 +; CHECK-NEXT: mulli 5, 0, 24 +; CHECK-NEXT: add 20, 21, 5 +; CHECK-NEXT: ld 5, 192(1) # 8-byte Folded Reload +; CHECK-NEXT: lxv 9, 0(5) +; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload +; CHECK-NEXT: lxv 7, 0(5) ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2 -; CHECK-NEXT: maddld 6, 26, 24, 3 -; CHECK-NEXT: mtctr 2 -; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 22, 23, 6 -; CHECK-NEXT: maddld 6, 26, 24, 16 -; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 21, 23, 6 -; CHECK-NEXT: maddld 6, 26, 24, 29 -; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 20, 23, 6 -; CHECK-NEXT: maddld 6, 26, 24, 15 -; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 19, 23, 6 -; CHECK-NEXT: maddld 6, 26, 24, 7 -; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 18, 23, 6 -; CHECK-NEXT: mulld 6, 26, 24 -; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 17, 23, 6 -; CHECK-NEXT: mr 6, 10 +; CHECK-NEXT: maddld 5, 28, 26, 2 +; CHECK-NEXT: mr 6, 22 +; CHECK-NEXT: mr 29, 20 +; CHECK-NEXT: mr 30, 19 +; CHECK-NEXT: mr 12, 18 +; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: sldi 5, 5, 3 +; CHECK-NEXT: add 24, 21, 5 +; CHECK-NEXT: maddld 5, 28, 26, 0 +; CHECK-NEXT: sldi 5, 5, 3 +; CHECK-NEXT: add 23, 21, 5 +; CHECK-NEXT: mr 5, 25 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_4: # %_loop_2_do_ ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lxvp 34, 0(6) -; CHECK-NEXT: lxvp 44, 0(17) -; CHECK-NEXT: xvmaddadp 4, 45, 35 -; CHECK-NEXT: lxvp 46, 0(18) -; CHECK-NEXT: xvmaddadp 3, 47, 35 -; CHECK-NEXT: lxvp 48, 0(19) -; CHECK-NEXT: lxvp 50, 0(20) -; CHECK-NEXT: lxvp 62, 0(21) -; CHECK-NEXT: lxvp 60, 0(22) +; CHECK-NEXT: lxvp 44, 0(5) +; CHECK-NEXT: xvmaddadp 43, 45, 35 +; CHECK-NEXT: lxvp 46, 0(23) +; CHECK-NEXT: xvmaddadp 42, 47, 35 +; CHECK-NEXT: lxvp 48, 0(24) +; CHECK-NEXT: lxvp 50, 0(29) +; CHECK-NEXT: lxvp 62, 0(30) +; CHECK-NEXT: lxvp 60, 0(12) ; CHECK-NEXT: lxvp 58, 32(6) -; CHECK-NEXT: lxvp 56, 32(17) -; CHECK-NEXT: lxvp 54, 32(18) -; CHECK-NEXT: lxvp 52, 32(19) -; CHECK-NEXT: lxvp 30, 32(20) -; CHECK-NEXT: lxvp 28, 32(21) -; CHECK-NEXT: lxvp 26, 32(22) -; CHECK-NEXT: xvmaddadp 2, 49, 35 -; CHECK-NEXT: xvmaddadp 1, 51, 35 -; CHECK-NEXT: xvmaddadp 43, 63, 35 -; CHECK-NEXT: xvmaddadp 42, 61, 35 -; CHECK-NEXT: xvmaddadp 0, 44, 34 -; CHECK-NEXT: xvmaddadp 41, 46, 34 -; CHECK-NEXT: xvmaddadp 40, 48, 34 -; CHECK-NEXT: xvmaddadp 39, 50, 34 -; CHECK-NEXT: xvmaddadp 38, 62, 34 -; CHECK-NEXT: xvmaddadp 33, 60, 34 -; CHECK-NEXT: xvmaddadp 32, 57, 59 -; CHECK-NEXT: xvmaddadp 37, 55, 59 -; CHECK-NEXT: xvmaddadp 36, 53, 59 -; CHECK-NEXT: xvmaddadp 13, 31, 59 -; CHECK-NEXT: xvmaddadp 12, 29, 59 -; CHECK-NEXT: xvmaddadp 11, 27, 59 -; CHECK-NEXT: xvmaddadp 9, 56, 58 -; CHECK-NEXT: xvmaddadp 8, 54, 58 -; CHECK-NEXT: xvmaddadp 6, 52, 58 -; CHECK-NEXT: xvmaddadp 5, 30, 58 -; CHECK-NEXT: xvmaddadp 7, 28, 58 -; CHECK-NEXT: xvmaddadp 10, 26, 58 +; CHECK-NEXT: lxvp 56, 32(5) +; CHECK-NEXT: lxvp 54, 32(23) +; CHECK-NEXT: lxvp 52, 32(24) +; CHECK-NEXT: lxvp 30, 32(29) +; CHECK-NEXT: lxvp 28, 32(30) +; CHECK-NEXT: lxvp 26, 32(12) +; CHECK-NEXT: xvmaddadp 41, 49, 35 +; CHECK-NEXT: xvmaddadp 40, 51, 35 +; CHECK-NEXT: xvmaddadp 39, 63, 35 +; CHECK-NEXT: xvmaddadp 38, 61, 35 +; CHECK-NEXT: xvmaddadp 33, 44, 34 +; CHECK-NEXT: xvmaddadp 32, 46, 34 +; CHECK-NEXT: xvmaddadp 37, 48, 34 +; CHECK-NEXT: xvmaddadp 36, 50, 34 +; CHECK-NEXT: xvmaddadp 13, 62, 34 +; CHECK-NEXT: xvmaddadp 12, 60, 34 +; CHECK-NEXT: xvmaddadp 11, 57, 59 +; CHECK-NEXT: xvmaddadp 10, 55, 59 +; CHECK-NEXT: xvmaddadp 8, 53, 59 +; CHECK-NEXT: xvmaddadp 6, 31, 59 +; CHECK-NEXT: xvmaddadp 5, 29, 59 +; CHECK-NEXT: xvmaddadp 3, 27, 59 +; CHECK-NEXT: xvmaddadp 1, 56, 58 +; CHECK-NEXT: xvmaddadp 0, 54, 58 +; CHECK-NEXT: xvmaddadp 2, 52, 58 +; CHECK-NEXT: xvmaddadp 4, 30, 58 +; CHECK-NEXT: xvmaddadp 9, 28, 58 +; CHECK-NEXT: xvmaddadp 7, 26, 58 ; CHECK-NEXT: addi 6, 6, 64 -; CHECK-NEXT: addi 17, 17, 64 -; CHECK-NEXT: addi 18, 18, 64 -; CHECK-NEXT: addi 19, 19, 64 -; CHECK-NEXT: addi 20, 20, 64 -; CHECK-NEXT: addi 21, 21, 64 -; CHECK-NEXT: addi 22, 22, 64 +; CHECK-NEXT: addi 5, 5, 64 +; CHECK-NEXT: addi 23, 23, 64 +; CHECK-NEXT: addi 24, 24, 64 +; CHECK-NEXT: addi 29, 29, 64 +; CHECK-NEXT: addi 30, 30, 64 +; CHECK-NEXT: addi 12, 12, 64 ; CHECK-NEXT: bdnz .LBB0_4 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_ ; CHECK-NEXT: # -; CHECK-NEXT: addi 25, 25, 6 -; CHECK-NEXT: add 5, 5, 27 -; CHECK-NEXT: add 14, 14, 27 -; CHECK-NEXT: add 11, 11, 27 -; CHECK-NEXT: add 12, 12, 27 -; CHECK-NEXT: add 30, 30, 27 -; CHECK-NEXT: add 28, 28, 27 -; CHECK-NEXT: addi 24, 24, 1 -; CHECK-NEXT: cmpld 25, 0 +; CHECK-NEXT: addi 27, 27, 6 +; CHECK-NEXT: add 25, 25, 11 +; CHECK-NEXT: add 14, 14, 11 +; CHECK-NEXT: add 18, 18, 11 +; CHECK-NEXT: add 31, 31, 11 +; CHECK-NEXT: add 19, 19, 11 +; CHECK-NEXT: add 20, 20, 11 +; CHECK-NEXT: addi 26, 26, 1 +; CHECK-NEXT: cmpld 27, 4 ; CHECK-NEXT: ble 0, .LBB0_3 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit +; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 43, 0(3) ; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 4, 0(3) +; CHECK-NEXT: stxv 42, 0(3) ; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 3, 0(3) +; CHECK-NEXT: stxv 41, 0(3) ; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 2, 0(3) -; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 1, 0(3) +; CHECK-NEXT: stxv 40, 0(10) +; CHECK-NEXT: stxv 39, 0(8) +; CHECK-NEXT: stxv 38, 0(3) ; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 43, 0(8) -; CHECK-NEXT: stxv 42, 0(3) -; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 0, 0(3) +; CHECK-NEXT: stxv 33, 0(3) ; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 41, 0(3) +; CHECK-NEXT: stxv 32, 0(3) ; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 40, 0(3) +; CHECK-NEXT: stxv 37, 0(3) +; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 36, 0(3) ; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 39, 0(3) +; CHECK-NEXT: stxv 13, 0(3) ; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 38, 0(3) +; CHECK-NEXT: stxv 12, 0(3) +; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 11, 0(3) +; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 10, 0(3) ; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 33, 0(3) +; CHECK-NEXT: stxv 8, 0(3) ; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 32, 0(3) +; CHECK-NEXT: stxv 6, 0(3) ; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 37, 0(3) +; CHECK-NEXT: stxv 5, 0(3) ; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 36, 0(3) +; CHECK-NEXT: stxv 3, 0(3) ; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 13, 0(3) +; CHECK-NEXT: stxv 1, 0(3) ; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 12, 0(3) +; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 11, 0(3) +; CHECK-NEXT: stxv 2, 0(3) ; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 9, 0(3) +; CHECK-NEXT: stxv 4, 0(3) ; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 8, 0(3) +; CHECK-NEXT: stxv 9, 0(3) ; CHECK-NEXT: ld 3, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 6, 0(3) -; CHECK-NEXT: ld 3, 208(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 5, 0(3) -; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 7, 0(3) -; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 10, 0(3) ; CHECK-NEXT: .LBB0_7: # %_return_bb -; CHECK-NEXT: lxv 63, 400(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 62, 384(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 61, 368(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 60, 352(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 59, 336(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 58, 320(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 57, 304(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 56, 288(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 55, 272(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 54, 256(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 53, 240(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 52, 224(1) # 16-byte Folded Reload -; CHECK-NEXT: lfd 31, 600(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 30, 592(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 29, 584(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 28, 576(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 27, 568(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 26, 560(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 31, 552(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 30, 544(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 536(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, 528(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, 520(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 512(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 504(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 496(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 488(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 480(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 472(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 464(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 456(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 448(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 440(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 432(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 608 +; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 60, 336(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 59, 320(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 58, 304(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 57, 288(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 56, 272(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 55, 256(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload +; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 568(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 28, 560(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 27, 552(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 26, 544(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 31, 536(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, 528(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 23, 472(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 22, 464(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 21, 456(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 20, 448(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 19, 440(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 592 ; CHECK-NEXT: blr entry: %_val_l_ = load i32, i32* %.l, align 4