Index: llvm/lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2331,7 +2331,10 @@ if (!AtStart) --BeforeI; - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Reorder CSR reloads in epilogue to follow the same order as CSR saves in + // the prologue as consecutive loads/stores in increasing order perform better + // on some PPC CPUs. + for (unsigned i = CSI.size(), e = 0; i-- != e;) { unsigned Reg = CSI[i].getReg(); // Only Darwin actually uses the VRSAVE register, but it can still appear Index: llvm/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir =================================================================== --- llvm/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir +++ llvm/test/CodeGen/MIR/PowerPC/prolog_vec_spills.mir @@ -19,9 +19,9 @@ # CHECK: $f1 = MTVSRD killed $x14 # CHECK-NEXT: $f2 = MTVSRD killed $x15 # CHECK-NEXT: $f3 = MTVSRD killed $x16 -# CHECK: $x16 = MFVSRD killed $f3 +# CHECK: $x14 = MFVSRD killed $f1 # CHECK-NEXT: $x15 = MFVSRD killed $f2 -# CHECK-NEXT: $x14 = MFVSRD killed $f1 +# CHECK-NEXT: $x16 = MFVSRD killed $f3 ... --- @@ -56,7 +56,7 @@ # CHECK: $f0 = MTVSRD killed $x14 # CHECK-NEXT: $f1 = MTVSRD killed $x15 # CHECK-NEXT: $f2 = MTVSRD killed $x16 -# CHECK: $x16 = MFVSRD killed $f2 +# CHECK: $x14 = MFVSRD killed $f0 # CHECK-NEXT: $x15 = MFVSRD killed $f1 -# CHECK-NEXT: $x14 = MFVSRD killed $f0 +# CHECK-NEXT: $x16 = MFVSRD killed $f2 ... Index: llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll =================================================================== --- llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -293,15 +293,15 @@ ; CHECK-NEXT: .LBB0_15: # %bb3 ; CHECK-NEXT: mr 3, 30 ; CHECK-NEXT: .LBB0_16: # %bb5 -; CHECK-NEXT: lfd 31, 456(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 30, 448(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 29, 440(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 28, 432(1) # 8-byte Folded Reload +; CHECK-NEXT: lwz 29, 412(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 12, 408(1) -; CHECK-NEXT: lfd 27, 424(1) # 8-byte Folded Reload ; CHECK-NEXT: lwz 30, 416(1) # 4-byte Folded Reload +; CHECK-NEXT: lfd 27, 424(1) # 8-byte Folded Reload ; CHECK-NEXT: mtcrf 32, 12 # cr2 -; CHECK-NEXT: lwz 29, 412(1) # 4-byte Folded Reload +; CHECK-NEXT: lfd 28, 432(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 440(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 448(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 31, 456(1) # 8-byte Folded Reload ; CHECK-NEXT: lwz 0, 468(1) ; CHECK-NEXT: addi 1, 1, 464 ; CHECK-NEXT: mtlr 0 Index: llvm/test/CodeGen/PowerPC/CSR-fit.ll =================================================================== --- llvm/test/CodeGen/PowerPC/CSR-fit.ll +++ llvm/test/CodeGen/PowerPC/CSR-fit.ll @@ -28,8 +28,8 @@ ; CHECK-PWR8-NEXT: addi r1, r1, 176 ; CHECK-PWR8-NEXT: ld r0, 16(r1) ; CHECK-PWR8-NEXT: mtlr r0 -; CHECK-PWR8-NEXT: ld r15, -136(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: ld r14, -144(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: ld r15, -136(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: blr ; ; CHECK-PWR9-LABEL: caller1: @@ -52,8 +52,8 @@ ; CHECK-PWR9-NEXT: addi r1, r1, 176 ; CHECK-PWR9-NEXT: ld r0, 16(r1) ; CHECK-PWR9-NEXT: mtlr r0 -; CHECK-PWR9-NEXT: ld r15, -136(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r14, -144(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r15, -136(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: blr entry: %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15}"(i32 %a, i32 %b) @@ -82,8 +82,8 @@ ; CHECK-PWR8-NEXT: addi r1, r1, 176 ; CHECK-PWR8-NEXT: ld r0, 16(r1) ; CHECK-PWR8-NEXT: mtlr r0 -; CHECK-PWR8-NEXT: lfd f15, -136(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: lfd f14, -144(r1) # 8-byte Folded Reload +; CHECK-PWR8-NEXT: lfd f15, -136(r1) # 8-byte Folded Reload ; CHECK-PWR8-NEXT: blr ; ; CHECK-PWR9-LABEL: caller2: @@ -106,8 +106,8 @@ ; CHECK-PWR9-NEXT: addi r1, r1, 176 ; CHECK-PWR9-NEXT: ld r0, 16(r1) ; CHECK-PWR9-NEXT: mtlr r0 -; CHECK-PWR9-NEXT: lfd f15, -136(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: lfd f14, -144(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f15, -136(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: blr entry: %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{f14},~{f15}"(i32 %a, i32 %b) @@ -135,10 +135,10 @@ ; CHECK-PWR8-NEXT: extsw r3, r3 ; CHECK-PWR8-NEXT: bl callee ; CHECK-PWR8-NEXT: nop -; CHECK-PWR8-NEXT: li r4, 64 -; CHECK-PWR8-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload ; CHECK-PWR8-NEXT: li r4, 48 ; CHECK-PWR8-NEXT: lxvd2x v20, r1, r4 # 16-byte Folded Reload +; CHECK-PWR8-NEXT: li r4, 64 +; CHECK-PWR8-NEXT: lxvd2x v21, r1, r4 # 16-byte Folded Reload ; CHECK-PWR8-NEXT: addi r1, r1, 240 ; CHECK-PWR8-NEXT: ld r0, 16(r1) ; CHECK-PWR8-NEXT: mtlr r0 @@ -161,8 +161,8 @@ ; CHECK-PWR9-NEXT: extsw r3, r3 ; CHECK-PWR9-NEXT: bl callee ; CHECK-PWR9-NEXT: nop -; CHECK-PWR9-NEXT: lxv v21, 48(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: lxv v20, 32(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v21, 48(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: addi r1, r1, 224 ; CHECK-PWR9-NEXT: ld r0, 16(r1) ; CHECK-PWR9-NEXT: mtlr r0 @@ -265,8 +265,8 @@ ; CHECK-PWR9-NEXT: bl callee ; CHECK-PWR9-NEXT: nop ; CHECK-PWR9-NEXT: lxv v20, 32(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f14, 368(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r14, 224(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f14, 368(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: addi r1, r1, 512 ; CHECK-PWR9-NEXT: ld r0, 16(r1) ; CHECK-PWR9-NEXT: mtlr r0 Index: llvm/test/CodeGen/PowerPC/coldcc.ll =================================================================== --- llvm/test/CodeGen/PowerPC/coldcc.ll +++ llvm/test/CodeGen/PowerPC/coldcc.ll @@ -30,11 +30,11 @@ ; COLDCC: std 8, -24(1) ; COLDCC: std 9, -32(1) ; COLDCC: std 10, -40(1) -; COLDCC: ld 10, -40(1) -; COLDCC: ld 9, -32(1) -; COLDCC: ld 8, -24(1) -; COLDCC: ld 7, -16(1) ; COLDCC: ld 6, -8(1) +; COLDCC: ld 7, -16(1) +; COLDCC: ld 8, -24(1) +; COLDCC: ld 9, -32(1) +; COLDCC: ld 10, -40(1) %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r6},~{r7},~{r8},~{r9},~{r10}"(i32 %a, i32 %b) %mul = mul nsw i32 %a, 3 %1 = mul i32 %b, -5 Index: llvm/test/CodeGen/PowerPC/larger-than-red-zone.ll =================================================================== --- llvm/test/CodeGen/PowerPC/larger-than-red-zone.ll +++ llvm/test/CodeGen/PowerPC/larger-than-red-zone.ll @@ -54,25 +54,25 @@ ; CHECK-NEXT: extsw r3, r3 ; CHECK-NEXT: bl callee ; CHECK-NEXT: nop -; CHECK-NEXT: lfd f14, 176(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r31, 168(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, 160(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, 152(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r28, 144(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r27, 136(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r26, 128(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, 120(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r24, 112(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r23, 104(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r22, 96(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r21, 88(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r20, 80(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r19, 72(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r18, 64(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r17, 56(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r16, 48(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r15, 40(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r14, 32(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r15, 40(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, 48(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r17, 56(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, 64(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, 72(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, 80(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r21, 88(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, 96(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r23, 104(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, 112(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, 128(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, 136(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, 144(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, 152(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, 160(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r31, 168(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f14, 176(r1) # 8-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 320 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 Index: llvm/test/CodeGen/PowerPC/not-fixed-frame-object.ll =================================================================== --- llvm/test/CodeGen/PowerPC/not-fixed-frame-object.ll +++ llvm/test/CodeGen/PowerPC/not-fixed-frame-object.ll @@ -62,24 +62,24 @@ ; CHECK-NEXT: addi r1, r1, 192 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: ld r31, -8(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r24, -64(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r23, -72(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r22, -80(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r21, -88(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r20, -96(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r19, -104(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r18, -112(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r17, -120(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r16, -128(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r15, -136(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r14, -144(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r16, -128(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r17, -120(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r18, -112(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r19, -104(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r20, -96(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r21, -88(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r22, -80(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r23, -72(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r24, -64(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r31, -8(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr entry: %0 = tail call i32 asm "add $0, $1, $2", "=r,r,r,~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13}"(i32 %a, i32 %b) Index: llvm/test/CodeGen/PowerPC/reverse-callee-saved-register-reloads.ll =================================================================== --- llvm/test/CodeGen/PowerPC/reverse-callee-saved-register-reloads.ll +++ llvm/test/CodeGen/PowerPC/reverse-callee-saved-register-reloads.ll @@ -110,55 +110,55 @@ ; CHECK-PWR9-NEXT: #NO_APP ; CHECK-PWR9-NEXT: ld r3, 40(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: ld r4, 32(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v20, 48(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v21, 64(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v22, 80(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: add r3, r4, r3 -; CHECK-PWR9-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v27, 160(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v26, 144(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v25, 128(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v24, 112(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: lxv v23, 96(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v22, 80(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v21, 64(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lxv v20, 48(r1) # 16-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f31, 520(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f30, 512(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f29, 504(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f28, 496(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f27, 488(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f26, 480(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f25, 472(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f24, 464(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f23, 456(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f22, 448(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f21, 440(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f20, 432(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f19, 424(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f18, 416(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f17, 408(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f16, 400(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f15, 392(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: lfd f14, 384(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r31, 376(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r30, 368(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r29, 360(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r28, 352(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r27, 344(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r26, 336(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r25, 328(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r24, 320(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r23, 312(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r22, 304(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r21, 296(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r20, 288(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r19, 280(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r18, 272(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r17, 264(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r16, 256(r1) # 8-byte Folded Reload -; CHECK-PWR9-NEXT: ld r15, 248(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v24, 112(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v25, 128(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v26, 144(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v27, 160(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload +; CHECK-PWR9-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload ; CHECK-PWR9-NEXT: ld r14, 240(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r15, 248(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r16, 256(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r17, 264(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r18, 272(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r19, 280(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r20, 288(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r21, 296(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r22, 304(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r23, 312(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r24, 320(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r25, 328(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r26, 336(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r27, 344(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r28, 352(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r29, 360(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r30, 368(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: ld r31, 376(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f14, 384(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f15, 392(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f16, 400(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f17, 408(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f18, 416(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f19, 424(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f20, 432(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f21, 440(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f22, 448(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f23, 456(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f24, 464(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f25, 472(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f26, 480(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f27, 488(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f28, 496(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f29, 504(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f30, 512(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: lfd f31, 520(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: addi r1, r1, 528 ; CHECK-PWR9-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll +++ llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll @@ -130,8 +130,18 @@ ; PPC32-NEXT: or. 3, 30, 29 ; PPC32-NEXT: cror 25, 25, 11 ; PPC32-NEXT: crnor 20, 2, 22 -; PPC32-NEXT: lwz 12, 28(1) +; PPC32-NEXT: lwz 20, 32(1) +; PPC32-NEXT: lwz 21, 36(1) +; PPC32-NEXT: lwz 22, 40(1) +; PPC32-NEXT: lwz 23, 44(1) +; PPC32-NEXT: lwz 24, 48(1) +; PPC32-NEXT: lwz 25, 52(1) +; PPC32-NEXT: lwz 26, 56(1) +; PPC32-NEXT: lwz 27, 60(1) +; PPC32-NEXT: lwz 28, 64(1) +; PPC32-NEXT: lwz 29, 68(1) ; PPC32-NEXT: cror 20, 20, 25 +; PPC32-NEXT: lwz 12, 28(1) ; PPC32-NEXT: cror 20, 20, 24 ; PPC32-NEXT: crnor 20, 20, 12 ; PPC32-NEXT: li 3, 1 @@ -145,16 +155,6 @@ ; PPC32-NEXT: mtcrf 32, 12 ; PPC32-NEXT: mtcrf 16, 12 ; PPC32-NEXT: lwz 30, 72(1) -; PPC32-NEXT: lwz 29, 68(1) -; PPC32-NEXT: lwz 28, 64(1) -; PPC32-NEXT: lwz 27, 60(1) -; PPC32-NEXT: lwz 26, 56(1) -; PPC32-NEXT: lwz 25, 52(1) -; PPC32-NEXT: lwz 24, 48(1) -; PPC32-NEXT: lwz 23, 44(1) -; PPC32-NEXT: lwz 22, 40(1) -; PPC32-NEXT: lwz 21, 36(1) -; PPC32-NEXT: lwz 20, 32(1) ; PPC32-NEXT: lwz 0, 84(1) ; PPC32-NEXT: addi 1, 1, 80 ; PPC32-NEXT: mtlr 0