Index: llvm/lib/CodeGen/MachineLICM.cpp =================================================================== --- llvm/lib/CodeGen/MachineLICM.cpp +++ llvm/lib/CodeGen/MachineLICM.cpp @@ -1275,9 +1275,43 @@ return false; } - // Rematerializable instructions should always be hoisted since the register - // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(MI, AA)) + // If the rematerializable instruction's only user is in the loop, and the + // user is also a loop invariant, hoisting the rematerializable instruction + // will always make the user be hoisted outside of the loop. RA can only + // sink rematerializable instruction, not rematerializable instruction's + // user. Hoisting both rematerializable instruction and its user will + // increase register pressure. + // For targets which support hoisting cheap rematerializable instruction based + // on register pressure, we should do it considering register pressure. + auto ShouldHoistRemat = [&] (MachineInstr &MI) { + if (!TII->isTriviallyReMaterializable(MI, AA)) + return false; + + // If this is not a cheap rematerializable instruction, hoist it. + if (!CheapInstr) + return true; + + // If target prefers not to hoist cheap instructions based on register + // pressure, hoist the rematerializable instruciton now. + if (!TII->shouldHoistCheapInstructions()) + return true; + + // Make sure rematerializable instruction's only user is not loop invariant. + // Remat clients assume operand 0 is the defined register. + if (!MI.getNumOperands() || !MI.getOperand(0).isReg()) + return true; + Register DefReg = MI.getOperand(0).getReg(); + if (!MRI->hasOneNonDBGUse(DefReg)) + return true; + MachineInstr* UseMI = &*MRI->use_instr_begin(DefReg); + bool IsSafeToMove = false; + if (IsLoopInvariantInst(*UseMI, IsSafeToMove)) + return false; + return true; + }; + + bool IsRematHoisted = ShouldHoistRemat(MI); + if (IsRematHoisted) return true; // FIXME: If there are long latency loop-invariant instructions inside the @@ -1330,8 +1364,7 @@ // High register pressure situation, only hoist if the instruction is going // to be remat'ed. - if (!TII->isTriviallyReMaterializable(MI, AA) && - !MI.isDereferenceableInvariantLoad(AA)) { + if (!IsRematHoisted && !MI.isDereferenceableInvariantLoad(AA)) { LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; } Index: llvm/lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -314,6 +314,9 @@ return false; } + /// Hoist cheap instructions based on register pressure in Machine LICM. + bool shouldHoistCheapInstructions() const override { return true; } + bool useMachineCombiner() const override { return true; } Index: llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll =================================================================== --- llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll +++ llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll @@ -6,431 +6,282 @@ define zeroext i32 @test1(i64 %0, i64* %1) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: stdu 1, -720(1) -; CHECK-NEXT: .cfi_def_cfa_offset 720 -; CHECK-NEXT: .cfi_offset r14, -144 -; CHECK-NEXT: .cfi_offset r15, -136 -; CHECK-NEXT: .cfi_offset r16, -128 -; CHECK-NEXT: .cfi_offset r17, -120 -; CHECK-NEXT: .cfi_offset r18, -112 -; CHECK-NEXT: .cfi_offset r19, -104 -; CHECK-NEXT: .cfi_offset r20, -96 -; CHECK-NEXT: .cfi_offset r21, -88 -; CHECK-NEXT: .cfi_offset r22, -80 -; CHECK-NEXT: .cfi_offset r23, -72 -; CHECK-NEXT: .cfi_offset r24, -64 -; CHECK-NEXT: .cfi_offset r25, -56 -; CHECK-NEXT: .cfi_offset r26, -48 -; CHECK-NEXT: .cfi_offset r27, -40 -; CHECK-NEXT: .cfi_offset r28, -32 -; CHECK-NEXT: .cfi_offset r29, -24 -; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: .cfi_offset r31, -8 -; CHECK-NEXT: .cfi_offset r2, -152 ; CHECK-NEXT: lis 5, 4 ; CHECK-NEXT: ori 6, 5, 6292 ; CHECK-NEXT: ori 5, 5, 6291 -; CHECK-NEXT: sldi 6, 6, 32 -; CHECK-NEXT: oris 7, 6, 13030 -; CHECK-NEXT: oris 8, 6, 13066 -; CHECK-NEXT: ori 7, 7, 3704 -; CHECK-NEXT: oris 9, 6, 13054 -; CHECK-NEXT: ori 8, 8, 44408 -; CHECK-NEXT: ori 9, 9, 30840 -; CHECK-NEXT: add 7, 4, 7 -; CHECK-NEXT: oris 10, 6, 13042 -; CHECK-NEXT: ori 10, 10, 17272 -; CHECK-NEXT: std 7, 384(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 8 -; CHECK-NEXT: oris 11, 6, 13078 -; CHECK-NEXT: ori 11, 11, 57976 -; CHECK-NEXT: std 7, 376(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 9 -; CHECK-NEXT: oris 12, 6, 13115 -; CHECK-NEXT: ori 12, 12, 33144 -; CHECK-NEXT: std 7, 368(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 10 -; CHECK-NEXT: oris 0, 6, 13103 -; CHECK-NEXT: ori 0, 0, 19576 -; CHECK-NEXT: std 7, 360(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 11 -; CHECK-NEXT: std 30, 704(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 30, 6, 13091 +; CHECK-NEXT: sldi 0, 6, 32 +; CHECK-NEXT: oris 6, 0, 13030 +; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: oris 7, 0, 13066 +; CHECK-NEXT: oris 8, 0, 13054 +; CHECK-NEXT: oris 9, 0, 13042 +; CHECK-NEXT: oris 10, 0, 13078 +; CHECK-NEXT: oris 11, 0, 13115 +; CHECK-NEXT: oris 12, 0, 13103 +; CHECK-NEXT: oris 30, 0, 13091 +; CHECK-NEXT: oris 29, 0, 13127 +; CHECK-NEXT: oris 28, 0, 13164 +; CHECK-NEXT: oris 27, 0, 13152 +; CHECK-NEXT: oris 26, 0, 13139 +; CHECK-NEXT: oris 25, 0, 13176 +; CHECK-NEXT: ori 7, 7, 44408 +; CHECK-NEXT: ori 6, 6, 3704 +; CHECK-NEXT: ori 8, 8, 30840 +; CHECK-NEXT: ori 9, 9, 17272 +; CHECK-NEXT: ori 10, 10, 57976 +; CHECK-NEXT: ori 11, 11, 33144 +; CHECK-NEXT: ori 12, 12, 19576 ; CHECK-NEXT: ori 30, 30, 6008 -; CHECK-NEXT: std 7, 352(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 12 -; CHECK-NEXT: std 29, 696(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 29, 6, 13127 ; CHECK-NEXT: ori 29, 29, 46712 -; CHECK-NEXT: sldi 5, 5, 32 -; CHECK-NEXT: oris 5, 5, 29347 -; CHECK-NEXT: ori 5, 5, 20088 -; CHECK-NEXT: lis 8, 402 -; CHECK-NEXT: lis 9, 451 -; CHECK-NEXT: lis 10, 500 -; CHECK-NEXT: lis 11, 549 -; CHECK-NEXT: std 31, 712(1) # 8-byte Folded Spill -; CHECK-NEXT: std 2, 568(1) # 8-byte Folded Spill -; CHECK-NEXT: std 7, 344(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 0 -; CHECK-NEXT: std 28, 688(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 28, 6, 13164 ; CHECK-NEXT: ori 28, 28, 21880 -; CHECK-NEXT: std 7, 336(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 30 -; CHECK-NEXT: std 27, 680(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 27, 6, 13152 ; CHECK-NEXT: ori 27, 27, 8312 -; CHECK-NEXT: std 7, 328(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 29 -; CHECK-NEXT: std 26, 672(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 26, 6, 13139 ; CHECK-NEXT: ori 26, 26, 60280 -; CHECK-NEXT: std 7, 320(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 28 -; CHECK-NEXT: std 25, 664(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 25, 6, 13176 ; CHECK-NEXT: ori 25, 25, 35448 -; CHECK-NEXT: std 7, 312(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 27 -; CHECK-NEXT: std 7, 304(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 26 -; CHECK-NEXT: std 7, 296(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 4, 25 -; CHECK-NEXT: std 7, 288(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 7, 6, 13213 -; CHECK-NEXT: ori 7, 7, 10616 -; CHECK-NEXT: add 7, 4, 7 -; CHECK-NEXT: std 7, 280(1) # 8-byte Folded Spill -; CHECK-NEXT: oris 7, 6, 13200 -; CHECK-NEXT: oris 6, 6, 13188 -; CHECK-NEXT: ori 7, 7, 62584 -; CHECK-NEXT: ori 6, 6, 49016 -; CHECK-NEXT: add 7, 4, 7 +; CHECK-NEXT: sldi 5, 5, 32 +; CHECK-NEXT: oris 5, 5, 29347 +; CHECK-NEXT: ori 5, 5, 20088 +; CHECK-NEXT: add 5, 4, 5 ; CHECK-NEXT: add 6, 4, 6 -; CHECK-NEXT: add 4, 4, 5 -; CHECK-NEXT: lis 5, 268 -; CHECK-NEXT: std 4, 256(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill +; CHECK-NEXT: oris 24, 0, 13200 +; CHECK-NEXT: ori 24, 24, 62584 +; CHECK-NEXT: add 24, 4, 24 +; CHECK-NEXT: std 31, -8(1) # 8-byte Folded Spill +; CHECK-NEXT: std 2, -152(1) # 8-byte Folded Spill +; CHECK-NEXT: std 6, -160(1) # 8-byte Folded Spill +; CHECK-NEXT: add 6, 4, 7 +; CHECK-NEXT: add 7, 4, 8 +; CHECK-NEXT: add 8, 4, 9 +; CHECK-NEXT: add 9, 4, 10 +; CHECK-NEXT: add 10, 4, 11 +; CHECK-NEXT: add 11, 4, 12 +; CHECK-NEXT: add 12, 4, 30 +; CHECK-NEXT: add 30, 4, 29 +; CHECK-NEXT: add 29, 4, 28 +; CHECK-NEXT: add 28, 4, 27 +; CHECK-NEXT: add 27, 4, 26 +; CHECK-NEXT: add 26, 4, 25 +; CHECK-NEXT: oris 25, 0, 13213 +; CHECK-NEXT: oris 0, 0, 13188 +; CHECK-NEXT: ori 25, 25, 10616 +; CHECK-NEXT: ori 0, 0, 49016 +; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill +; CHECK-NEXT: add 23, 4, 0 +; CHECK-NEXT: add 25, 4, 25 ; CHECK-NEXT: lis 4, 585 -; CHECK-NEXT: ori 4, 4, 61440 -; CHECK-NEXT: std 4, 560(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, -88(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 21, 4, 61440 ; CHECK-NEXT: lis 4, 48 -; CHECK-NEXT: ori 4, 4, 54272 -; CHECK-NEXT: std 4, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, -96(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 20, 4, 54272 ; CHECK-NEXT: lis 4, 97 -; CHECK-NEXT: ori 4, 4, 43008 -; CHECK-NEXT: std 4, 544(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, -104(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 19, 4, 43008 ; CHECK-NEXT: lis 4, 146 -; CHECK-NEXT: ori 4, 4, 31744 -; CHECK-NEXT: std 4, 536(1) # 8-byte Folded Spill +; CHECK-NEXT: std 18, -112(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 18, 4, 31744 ; CHECK-NEXT: lis 4, 195 -; CHECK-NEXT: ori 4, 4, 20480 -; CHECK-NEXT: std 4, 528(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, -120(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 17, 4, 20480 ; CHECK-NEXT: lis 4, 244 -; CHECK-NEXT: ori 4, 4, 9216 -; CHECK-NEXT: std 4, 520(1) # 8-byte Folded Spill +; CHECK-NEXT: std 16, -128(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 16, 4, 9216 ; CHECK-NEXT: lis 4, 292 -; CHECK-NEXT: ori 4, 4, 63488 -; CHECK-NEXT: std 4, 512(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, -136(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 15, 4, 63488 ; CHECK-NEXT: lis 4, 341 -; CHECK-NEXT: ori 4, 4, 52224 -; CHECK-NEXT: std 4, 504(1) # 8-byte Folded Spill +; CHECK-NEXT: std 14, -144(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 14, 4, 52224 ; CHECK-NEXT: lis 4, 390 -; CHECK-NEXT: ori 4, 4, 40960 -; CHECK-NEXT: std 4, 496(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 31, 4, 40960 ; CHECK-NEXT: lis 4, 439 -; CHECK-NEXT: ori 4, 4, 29696 -; CHECK-NEXT: std 4, 488(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 2, 4, 29696 ; CHECK-NEXT: lis 4, 488 -; CHECK-NEXT: ori 4, 4, 18432 -; CHECK-NEXT: std 4, 480(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 537 -; CHECK-NEXT: ori 4, 4, 7168 -; CHECK-NEXT: std 4, 472(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 36 -; CHECK-NEXT: ori 4, 4, 40704 -; CHECK-NEXT: std 4, 464(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 85 -; CHECK-NEXT: ori 4, 4, 29440 -; CHECK-NEXT: std 4, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 134 -; CHECK-NEXT: ori 4, 4, 18176 -; CHECK-NEXT: std 4, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 183 -; CHECK-NEXT: ori 4, 4, 6912 -; CHECK-NEXT: std 4, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 231 -; CHECK-NEXT: ori 4, 4, 61184 -; CHECK-NEXT: std 4, 432(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 280 -; CHECK-NEXT: ori 4, 4, 49920 -; CHECK-NEXT: std 4, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 329 -; CHECK-NEXT: ori 4, 4, 38656 -; CHECK-NEXT: std 4, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 378 -; CHECK-NEXT: ori 4, 4, 27392 -; CHECK-NEXT: std 4, 408(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 427 -; CHECK-NEXT: ori 4, 4, 16128 -; CHECK-NEXT: std 4, 400(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 476 -; CHECK-NEXT: ori 4, 4, 4864 -; CHECK-NEXT: std 4, 248(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 524 -; CHECK-NEXT: ori 4, 4, 59136 -; CHECK-NEXT: std 4, 240(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 573 -; CHECK-NEXT: ori 4, 4, 47872 -; CHECK-NEXT: std 4, 232(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 24 -; CHECK-NEXT: ori 4, 4, 27136 -; CHECK-NEXT: std 4, 224(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 73 -; CHECK-NEXT: ori 4, 4, 15872 -; CHECK-NEXT: std 4, 216(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 122 -; CHECK-NEXT: ori 4, 4, 4608 -; CHECK-NEXT: std 4, 208(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 170 -; CHECK-NEXT: ori 4, 4, 58880 -; CHECK-NEXT: std 4, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 4, 219 -; CHECK-NEXT: ori 4, 4, 47616 -; CHECK-NEXT: std 4, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 36352 -; CHECK-NEXT: lis 5, 317 -; CHECK-NEXT: std 4, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 25088 -; CHECK-NEXT: lis 5, 366 -; CHECK-NEXT: std 4, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 13824 -; CHECK-NEXT: lis 5, 415 -; CHECK-NEXT: std 4, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 2560 -; CHECK-NEXT: lis 5, 463 -; CHECK-NEXT: std 4, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 56832 -; CHECK-NEXT: lis 5, 512 -; CHECK-NEXT: std 4, 152(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 45568 -; CHECK-NEXT: lis 5, 561 -; CHECK-NEXT: std 4, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 34304 -; CHECK-NEXT: lis 5, 12 -; CHECK-NEXT: std 4, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 13568 -; CHECK-NEXT: lis 5, 61 -; CHECK-NEXT: std 4, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 2304 -; CHECK-NEXT: lis 5, 109 -; CHECK-NEXT: std 4, 120(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 56576 -; CHECK-NEXT: lis 5, 158 -; CHECK-NEXT: std 4, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 45312 -; CHECK-NEXT: lis 5, 207 -; CHECK-NEXT: std 4, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 34048 -; CHECK-NEXT: lis 5, 256 -; CHECK-NEXT: std 6, 264(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 6, 305 -; CHECK-NEXT: ld 30, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 0, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 96(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 5, 22784 -; CHECK-NEXT: std 7, 272(1) # 8-byte Folded Spill -; CHECK-NEXT: lis 7, 354 -; CHECK-NEXT: std 4, 88(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 6, 11520 -; CHECK-NEXT: ld 6, 240(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 80(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 7, 256 -; CHECK-NEXT: ld 7, 232(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 8, 54528 -; CHECK-NEXT: ld 8, 224(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 9, 43264 -; CHECK-NEXT: ld 9, 216(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 56(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 10, 32000 -; CHECK-NEXT: ld 10, 208(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: ori 4, 11, 20736 -; CHECK-NEXT: ld 11, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: std 4, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: std 14, 576(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 584(1) # 8-byte Folded Spill -; CHECK-NEXT: std 16, 592(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 600(1) # 8-byte Folded Spill -; CHECK-NEXT: std 18, 608(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 616(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, 624(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 632(1) # 8-byte Folded Spill -; CHECK-NEXT: std 22, 640(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 648(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 656(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 248(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 104(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 72(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 64(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 56(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: ld 31, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill +; CHECK-NEXT: li 22, 0 +; CHECK-NEXT: std 6, -168(1) # 8-byte Folded Spill +; CHECK-NEXT: ori 0, 4, 18432 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_2 Depth 2 -; CHECK-NEXT: stw 4, 396(1) # 4-byte Folded Spill ; CHECK-NEXT: li 4, 83 ; CHECK-NEXT: mtctr 4 -; CHECK-NEXT: ld 12, 256(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 4, 128(1) # 8-byte Folded Reload +; CHECK-NEXT: mr 4, 5 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_2: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ld 2, 560(1) # 8-byte Folded Reload -; CHECK-NEXT: stdux 3, 12, 2 -; CHECK-NEXT: ld 2, 552(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 5 -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 544(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 536(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 528(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 520(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 512(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 504(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 496(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 488(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 480(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 472(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 464(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 456(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 448(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 440(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 432(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 408(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: ld 2, 400(1) # 8-byte Folded Reload -; CHECK-NEXT: stdx 3, 12, 2 -; CHECK-NEXT: stdx 3, 12, 6 -; CHECK-NEXT: stdx 3, 12, 7 -; CHECK-NEXT: stdx 3, 12, 8 -; CHECK-NEXT: stdx 3, 12, 9 -; CHECK-NEXT: stdx 3, 12, 10 -; CHECK-NEXT: stdx 3, 12, 11 -; CHECK-NEXT: stdx 3, 12, 30 -; CHECK-NEXT: stdx 3, 12, 29 -; CHECK-NEXT: stdx 3, 12, 28 -; CHECK-NEXT: stdx 3, 12, 27 -; CHECK-NEXT: stdx 3, 12, 26 -; CHECK-NEXT: stdx 3, 12, 25 -; CHECK-NEXT: stdx 3, 12, 24 -; CHECK-NEXT: stdx 3, 12, 23 -; CHECK-NEXT: stdx 3, 12, 4 -; CHECK-NEXT: stdx 3, 12, 0 -; CHECK-NEXT: stdx 3, 12, 22 -; CHECK-NEXT: stdx 3, 12, 21 -; CHECK-NEXT: stdx 3, 12, 20 -; CHECK-NEXT: stdx 3, 12, 19 -; CHECK-NEXT: stdx 3, 12, 18 -; CHECK-NEXT: stdx 3, 12, 17 -; CHECK-NEXT: stdx 3, 12, 16 -; CHECK-NEXT: stdx 3, 12, 15 -; CHECK-NEXT: stdx 3, 12, 14 -; CHECK-NEXT: stdx 3, 12, 31 +; CHECK-NEXT: lis 6, 537 +; CHECK-NEXT: ori 6, 6, 7168 +; CHECK-NEXT: stdux 3, 4, 21 +; CHECK-NEXT: stdx 3, 4, 20 +; CHECK-NEXT: stdx 3, 4, 19 +; CHECK-NEXT: stdx 3, 4, 18 +; CHECK-NEXT: stdx 3, 4, 17 +; CHECK-NEXT: stdx 3, 4, 16 +; CHECK-NEXT: stdx 3, 4, 15 +; CHECK-NEXT: stdx 3, 4, 14 +; CHECK-NEXT: stdx 3, 4, 31 +; CHECK-NEXT: stdx 3, 4, 2 +; CHECK-NEXT: stdx 3, 4, 0 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 36 +; CHECK-NEXT: ori 6, 6, 40704 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 85 +; CHECK-NEXT: ori 6, 6, 29440 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 134 +; CHECK-NEXT: ori 6, 6, 18176 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 183 +; CHECK-NEXT: ori 6, 6, 6912 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 231 +; CHECK-NEXT: ori 6, 6, 61184 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 280 +; CHECK-NEXT: ori 6, 6, 49920 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 329 +; CHECK-NEXT: ori 6, 6, 38656 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 378 +; CHECK-NEXT: ori 6, 6, 27392 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 427 +; CHECK-NEXT: ori 6, 6, 16128 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 476 +; CHECK-NEXT: ori 6, 6, 4864 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 524 +; CHECK-NEXT: ori 6, 6, 59136 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 573 +; CHECK-NEXT: ori 6, 6, 47872 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 24 +; CHECK-NEXT: ori 6, 6, 27136 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 73 +; CHECK-NEXT: ori 6, 6, 15872 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 122 +; CHECK-NEXT: ori 6, 6, 4608 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 170 +; CHECK-NEXT: ori 6, 6, 58880 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 219 +; CHECK-NEXT: ori 6, 6, 47616 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 268 +; CHECK-NEXT: ori 6, 6, 36352 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 317 +; CHECK-NEXT: ori 6, 6, 25088 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 366 +; CHECK-NEXT: ori 6, 6, 13824 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 415 +; CHECK-NEXT: ori 6, 6, 2560 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 463 +; CHECK-NEXT: ori 6, 6, 56832 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 512 +; CHECK-NEXT: ori 6, 6, 45568 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 561 +; CHECK-NEXT: ori 6, 6, 34304 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 12 +; CHECK-NEXT: ori 6, 6, 13568 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 61 +; CHECK-NEXT: ori 6, 6, 2304 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 109 +; CHECK-NEXT: ori 6, 6, 56576 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 158 +; CHECK-NEXT: ori 6, 6, 45312 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 207 +; CHECK-NEXT: ori 6, 6, 34048 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 256 +; CHECK-NEXT: ori 6, 6, 22784 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 305 +; CHECK-NEXT: ori 6, 6, 11520 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 354 +; CHECK-NEXT: ori 6, 6, 256 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 402 +; CHECK-NEXT: ori 6, 6, 54528 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 451 +; CHECK-NEXT: ori 6, 6, 43264 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 500 +; CHECK-NEXT: ori 6, 6, 32000 +; CHECK-NEXT: stdx 3, 4, 6 +; CHECK-NEXT: lis 6, 549 +; CHECK-NEXT: ori 6, 6, 20736 +; CHECK-NEXT: stdx 3, 4, 6 ; CHECK-NEXT: bdnz .LBB0_2 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: ld 12, 384(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 376(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 368(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 360(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 352(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 344(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 336(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 328(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 320(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 312(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 304(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 296(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 288(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 280(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 4, 396(1) # 4-byte Folded Reload -; CHECK-NEXT: addi 4, 4, 1 -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: ld 12, 272(1) # 8-byte Folded Reload -; CHECK-NEXT: std 3, 0(12) -; CHECK-NEXT: xoris 12, 4, 6 -; CHECK-NEXT: cmplwi 12, 6784 -; CHECK-NEXT: ld 12, 264(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 22, 22, 1 +; CHECK-NEXT: ld 4, -160(1) # 8-byte Folded Reload +; CHECK-NEXT: std 3, 0(4) +; CHECK-NEXT: ld 4, -168(1) # 8-byte Folded Reload +; CHECK-NEXT: std 3, 0(4) +; CHECK-NEXT: xoris 4, 22, 6 +; CHECK-NEXT: cmplwi 4, 6784 +; CHECK-NEXT: std 3, 0(7) +; CHECK-NEXT: std 3, 0(8) +; CHECK-NEXT: std 3, 0(9) +; CHECK-NEXT: std 3, 0(10) +; CHECK-NEXT: std 3, 0(11) ; CHECK-NEXT: std 3, 0(12) +; CHECK-NEXT: std 3, 0(30) +; CHECK-NEXT: std 3, 0(29) +; CHECK-NEXT: std 3, 0(28) +; CHECK-NEXT: std 3, 0(27) +; CHECK-NEXT: std 3, 0(26) +; CHECK-NEXT: std 3, 0(25) +; CHECK-NEXT: std 3, 0(24) +; CHECK-NEXT: std 3, 0(23) ; CHECK-NEXT: bne 0, .LBB0_1 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: ld 2, 568(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 31, 712(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 30, 704(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 696(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 2, -152(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 31, -8(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: ld 28, 688(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, 680(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 672(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 664(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 656(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 648(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 640(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 632(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 624(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 616(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 608(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 600(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 592(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 584(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 576(1) # 8-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 720 +; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 25, -56(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 22, -80(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 21, -88(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 20, -96(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 19, -104(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 18, -112(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 17, -120(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 16, -128(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 15, -136(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 14, -144(1) # 8-byte Folded Reload ; CHECK-NEXT: blr %3 = getelementptr inbounds i64, i64* %1, i64 144115188075855 %4 = getelementptr i64, i64* %1, i64 144115586875855