diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -395,12 +395,28 @@ const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs(); + BitVector CSMask(SavedRegs.size()); + + for (unsigned i = 0; CSRegs[i]; ++i) + CSMask.set(CSRegs[i]); std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (SavedRegs.test(Reg)) - CSI.push_back(CalleeSavedInfo(Reg)); + if (SavedRegs.test(Reg)) { + bool SavedSuper = false; + for (const MCPhysReg &SuperReg : RegInfo->superregs(Reg)) { + // Some backends set all aliases for some registers as saved, such as + // Mips's $fp, so they appear in SavedRegs but not CSRegs. + if (SavedRegs.test(SuperReg) && CSMask.test(SuperReg)) { + SavedSuper = true; + break; + } + } + + if (!SavedSuper) + CSI.push_back(CalleeSavedInfo(Reg)); + } } const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); diff --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll @@ -700,48 +700,28 @@ ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 ; SPE-NEXT: stw r0, 4(r1) -; SPE-NEXT: stwu r1, -144(r1) -; SPE-NEXT: .cfi_def_cfa_offset 144 +; SPE-NEXT: stwu r1, -96(r1) +; SPE-NEXT: .cfi_def_cfa_offset 96 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r21, -44 -; SPE-NEXT: .cfi_offset r22, -40 -; SPE-NEXT: .cfi_offset r23, -36 -; SPE-NEXT: .cfi_offset r24, -32 -; SPE-NEXT: .cfi_offset r25, -28 -; SPE-NEXT: .cfi_offset r26, -24 -; SPE-NEXT: .cfi_offset r27, -20 -; SPE-NEXT: .cfi_offset r28, -16 -; SPE-NEXT: .cfi_offset r29, -12 -; SPE-NEXT: .cfi_offset r30, -8 -; SPE-NEXT: .cfi_offset r21, -136 -; SPE-NEXT: .cfi_offset r22, -128 -; SPE-NEXT: .cfi_offset r23, -120 -; SPE-NEXT: .cfi_offset r24, -112 -; SPE-NEXT: .cfi_offset r25, -104 -; SPE-NEXT: .cfi_offset r26, -96 -; SPE-NEXT: .cfi_offset r27, -88 -; SPE-NEXT: .cfi_offset r28, -80 -; SPE-NEXT: .cfi_offset r29, -72 -; SPE-NEXT: .cfi_offset r30, -64 -; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill +; SPE-NEXT: .cfi_offset r21, -88 +; SPE-NEXT: .cfi_offset r22, -80 +; SPE-NEXT: .cfi_offset r23, -72 +; SPE-NEXT: .cfi_offset r24, -64 +; SPE-NEXT: .cfi_offset r25, -56 +; SPE-NEXT: .cfi_offset r26, -48 +; SPE-NEXT: .cfi_offset r27, -40 +; SPE-NEXT: .cfi_offset r28, -32 +; SPE-NEXT: .cfi_offset r29, -24 +; SPE-NEXT: .cfi_offset r30, -16 ; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r27, r5 -; SPE-NEXT: lwz r5, 164(r1) -; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill +; SPE-NEXT: lwz r5, 116(r1) ; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r25, r3 ; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r26, r4 ; SPE-NEXT: mr r3, r6 ; SPE-NEXT: mr r4, r10 -; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill ; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill @@ -752,9 +732,9 @@ ; SPE-NEXT: mr r29, r8 ; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r30, r9 -; SPE-NEXT: lwz r24, 152(r1) -; SPE-NEXT: lwz r23, 156(r1) -; SPE-NEXT: lwz r22, 160(r1) +; SPE-NEXT: lwz r24, 104(r1) +; SPE-NEXT: lwz r23, 108(r1) +; SPE-NEXT: lwz r22, 112(r1) ; SPE-NEXT: bl fmaf ; SPE-NEXT: mr r21, r3 ; SPE-NEXT: mr r3, r27 @@ -784,18 +764,8 @@ ; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r0, 148(r1) -; SPE-NEXT: addi r1, r1, 144 +; SPE-NEXT: lwz r0, 100(r1) +; SPE-NEXT: addi r1, r1, 96 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32( @@ -822,24 +792,14 @@ ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 ; SPE-NEXT: stw r0, 4(r1) -; SPE-NEXT: stwu r1, -96(r1) -; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: stwu r1, -64(r1) +; SPE-NEXT: .cfi_def_cfa_offset 64 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r26, -24 -; SPE-NEXT: .cfi_offset r27, -20 -; SPE-NEXT: .cfi_offset r28, -16 -; SPE-NEXT: .cfi_offset r29, -12 -; SPE-NEXT: .cfi_offset r30, -8 -; SPE-NEXT: .cfi_offset r26, -80 -; SPE-NEXT: .cfi_offset r27, -72 -; SPE-NEXT: .cfi_offset r28, -64 -; SPE-NEXT: .cfi_offset r29, -56 -; SPE-NEXT: .cfi_offset r30, -48 -; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill +; SPE-NEXT: .cfi_offset r26, -48 +; SPE-NEXT: .cfi_offset r27, -40 +; SPE-NEXT: .cfi_offset r28, -32 +; SPE-NEXT: .cfi_offset r29, -24 +; SPE-NEXT: .cfi_offset r30, -16 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill @@ -849,16 +809,16 @@ ; SPE-NEXT: evmergelo r9, r9, r10 ; SPE-NEXT: evmergelo r4, r5, r6 ; SPE-NEXT: mr r30, r3 -; SPE-NEXT: evldd r8, 112(r1) +; SPE-NEXT: evldd r8, 80(r1) ; SPE-NEXT: evmergehi r3, r4, r4 ; SPE-NEXT: evmergehi r5, r9, r9 ; SPE-NEXT: mr r6, r9 -; SPE-NEXT: evldd r29, 120(r1) +; SPE-NEXT: evldd r29, 88(r1) ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 ; SPE-NEXT: evmergehi r7, r8, r8 -; SPE-NEXT: evldd r28, 104(r1) +; SPE-NEXT: evldd r28, 72(r1) ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 ; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 ; SPE-NEXT: bl fma @@ -882,13 +842,8 @@ ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r0, 100(r1) -; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: lwz r0, 68(r1) +; SPE-NEXT: addi r1, r1, 64 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64( @@ -1026,33 +981,19 @@ ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 ; SPE-NEXT: stw r0, 4(r1) -; SPE-NEXT: stwu r1, -144(r1) -; SPE-NEXT: .cfi_def_cfa_offset 144 +; SPE-NEXT: stwu r1, -96(r1) +; SPE-NEXT: .cfi_def_cfa_offset 96 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r21, -44 -; SPE-NEXT: .cfi_offset r22, -40 -; SPE-NEXT: .cfi_offset r23, -36 -; SPE-NEXT: .cfi_offset r24, -32 -; SPE-NEXT: .cfi_offset r25, -28 -; SPE-NEXT: .cfi_offset r26, -24 -; SPE-NEXT: .cfi_offset r27, -20 -; SPE-NEXT: .cfi_offset r28, -16 -; SPE-NEXT: .cfi_offset r29, -12 -; SPE-NEXT: .cfi_offset r30, -8 -; SPE-NEXT: .cfi_offset r21, -136 -; SPE-NEXT: .cfi_offset r22, -128 -; SPE-NEXT: .cfi_offset r23, -120 -; SPE-NEXT: .cfi_offset r24, -112 -; SPE-NEXT: .cfi_offset r25, -104 -; SPE-NEXT: .cfi_offset r26, -96 -; SPE-NEXT: .cfi_offset r27, -88 -; SPE-NEXT: .cfi_offset r28, -80 -; SPE-NEXT: .cfi_offset r29, -72 -; SPE-NEXT: .cfi_offset r30, -64 -; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill +; SPE-NEXT: .cfi_offset r21, -88 +; SPE-NEXT: .cfi_offset r22, -80 +; SPE-NEXT: .cfi_offset r23, -72 +; SPE-NEXT: .cfi_offset r24, -64 +; SPE-NEXT: .cfi_offset r25, -56 +; SPE-NEXT: .cfi_offset r26, -48 +; SPE-NEXT: .cfi_offset r27, -40 +; SPE-NEXT: .cfi_offset r28, -32 +; SPE-NEXT: .cfi_offset r29, -24 +; SPE-NEXT: .cfi_offset r30, -16 ; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r25, r3 ; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill @@ -1061,13 +1002,10 @@ ; SPE-NEXT: mr r27, r5 ; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r28, r7 -; SPE-NEXT: lwz r3, 160(r1) -; SPE-NEXT: lwz r4, 152(r1) -; SPE-NEXT: lwz r5, 156(r1) -; SPE-NEXT: lwz r7, 164(r1) -; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill +; SPE-NEXT: lwz r3, 112(r1) +; SPE-NEXT: lwz r4, 104(r1) +; SPE-NEXT: lwz r5, 108(r1) +; SPE-NEXT: lwz r7, 116(r1) ; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: efsneg r22, r3 ; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill @@ -1077,9 +1015,6 @@ ; SPE-NEXT: efsneg r5, r7 ; SPE-NEXT: mr r3, r6 ; SPE-NEXT: mr r4, r10 -; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill ; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r29, r8 @@ -1114,18 +1049,8 @@ ; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r0, 148(r1) -; SPE-NEXT: addi r1, r1, 144 +; SPE-NEXT: lwz r0, 100(r1) +; SPE-NEXT: addi r1, r1, 96 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %neg = fneg <4 x float> %vf2 @@ -1153,28 +1078,18 @@ ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 ; SPE-NEXT: stw r0, 4(r1) -; SPE-NEXT: stwu r1, -96(r1) -; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: stwu r1, -64(r1) +; SPE-NEXT: .cfi_def_cfa_offset 64 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r26, -24 -; SPE-NEXT: .cfi_offset r27, -20 -; SPE-NEXT: .cfi_offset r28, -16 -; SPE-NEXT: .cfi_offset r29, -12 -; SPE-NEXT: .cfi_offset r30, -8 -; SPE-NEXT: .cfi_offset r26, -80 -; SPE-NEXT: .cfi_offset r27, -72 -; SPE-NEXT: .cfi_offset r28, -64 -; SPE-NEXT: .cfi_offset r29, -56 -; SPE-NEXT: .cfi_offset r30, -48 -; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill +; SPE-NEXT: .cfi_offset r26, -48 +; SPE-NEXT: .cfi_offset r27, -40 +; SPE-NEXT: .cfi_offset r28, -32 +; SPE-NEXT: .cfi_offset r29, -24 +; SPE-NEXT: .cfi_offset r30, -16 ; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r30, r3 -; SPE-NEXT: evldd r3, 112(r1) -; SPE-NEXT: evldd r11, 120(r1) -; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill +; SPE-NEXT: evldd r3, 80(r1) +; SPE-NEXT: evldd r11, 88(r1) ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill ; SPE-NEXT: efdneg r27, r11 @@ -1188,7 +1103,7 @@ ; SPE-NEXT: evmergehi r5, r9, r9 ; SPE-NEXT: evmergehi r7, r8, r8 ; SPE-NEXT: mr r6, r9 -; SPE-NEXT: evldd r28, 104(r1) +; SPE-NEXT: evldd r28, 72(r1) ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 @@ -1215,13 +1130,8 @@ ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r0, 100(r1) -; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: lwz r0, 68(r1) +; SPE-NEXT: addi r1, r1, 64 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %neg = fneg <2 x double> %vf2 @@ -1360,48 +1270,28 @@ ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 ; SPE-NEXT: stw r0, 4(r1) -; SPE-NEXT: stwu r1, -144(r1) -; SPE-NEXT: .cfi_def_cfa_offset 144 +; SPE-NEXT: stwu r1, -96(r1) +; SPE-NEXT: .cfi_def_cfa_offset 96 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r21, -44 -; SPE-NEXT: .cfi_offset r22, -40 -; SPE-NEXT: .cfi_offset r23, -36 -; SPE-NEXT: .cfi_offset r24, -32 -; SPE-NEXT: .cfi_offset r25, -28 -; SPE-NEXT: .cfi_offset r26, -24 -; SPE-NEXT: .cfi_offset r27, -20 -; SPE-NEXT: .cfi_offset r28, -16 -; SPE-NEXT: .cfi_offset r29, -12 -; SPE-NEXT: .cfi_offset r30, -8 -; SPE-NEXT: .cfi_offset r21, -136 -; SPE-NEXT: .cfi_offset r22, -128 -; SPE-NEXT: .cfi_offset r23, -120 -; SPE-NEXT: .cfi_offset r24, -112 -; SPE-NEXT: .cfi_offset r25, -104 -; SPE-NEXT: .cfi_offset r26, -96 -; SPE-NEXT: .cfi_offset r27, -88 -; SPE-NEXT: .cfi_offset r28, -80 -; SPE-NEXT: .cfi_offset r29, -72 -; SPE-NEXT: .cfi_offset r30, -64 -; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill +; SPE-NEXT: .cfi_offset r21, -88 +; SPE-NEXT: .cfi_offset r22, -80 +; SPE-NEXT: .cfi_offset r23, -72 +; SPE-NEXT: .cfi_offset r24, -64 +; SPE-NEXT: .cfi_offset r25, -56 +; SPE-NEXT: .cfi_offset r26, -48 +; SPE-NEXT: .cfi_offset r27, -40 +; SPE-NEXT: .cfi_offset r28, -32 +; SPE-NEXT: .cfi_offset r29, -24 +; SPE-NEXT: .cfi_offset r30, -16 ; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r27, r5 -; SPE-NEXT: lwz r5, 164(r1) -; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill +; SPE-NEXT: lwz r5, 116(r1) ; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r25, r3 ; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r26, r4 ; SPE-NEXT: mr r3, r6 ; SPE-NEXT: mr r4, r10 -; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill ; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill @@ -1412,9 +1302,9 @@ ; SPE-NEXT: mr r29, r8 ; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r30, r9 -; SPE-NEXT: lwz r24, 152(r1) -; SPE-NEXT: lwz r23, 156(r1) -; SPE-NEXT: lwz r22, 160(r1) +; SPE-NEXT: lwz r24, 104(r1) +; SPE-NEXT: lwz r23, 108(r1) +; SPE-NEXT: lwz r22, 112(r1) ; SPE-NEXT: bl fmaf ; SPE-NEXT: mr r21, r3 ; SPE-NEXT: mr r3, r27 @@ -1433,9 +1323,9 @@ ; SPE-NEXT: bl fmaf ; SPE-NEXT: efsneg r4, r29 ; SPE-NEXT: efsneg r5, r30 +; SPE-NEXT: efsneg r3, r3 ; SPE-NEXT: efsneg r6, r21 ; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload -; SPE-NEXT: efsneg r3, r3 ; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload @@ -1445,18 +1335,8 @@ ; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r0, 148(r1) -; SPE-NEXT: addi r1, r1, 144 +; SPE-NEXT: lwz r0, 100(r1) +; SPE-NEXT: addi r1, r1, 96 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32( @@ -1484,24 +1364,14 @@ ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 ; SPE-NEXT: stw r0, 4(r1) -; SPE-NEXT: stwu r1, -96(r1) -; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: stwu r1, -64(r1) +; SPE-NEXT: .cfi_def_cfa_offset 64 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r26, -24 -; SPE-NEXT: .cfi_offset r27, -20 -; SPE-NEXT: .cfi_offset r28, -16 -; SPE-NEXT: .cfi_offset r29, -12 -; SPE-NEXT: .cfi_offset r30, -8 -; SPE-NEXT: .cfi_offset r26, -80 -; SPE-NEXT: .cfi_offset r27, -72 -; SPE-NEXT: .cfi_offset r28, -64 -; SPE-NEXT: .cfi_offset r29, -56 -; SPE-NEXT: .cfi_offset r30, -48 -; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill +; SPE-NEXT: .cfi_offset r26, -48 +; SPE-NEXT: .cfi_offset r27, -40 +; SPE-NEXT: .cfi_offset r28, -32 +; SPE-NEXT: .cfi_offset r29, -24 +; SPE-NEXT: .cfi_offset r30, -16 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill @@ -1511,16 +1381,16 @@ ; SPE-NEXT: evmergelo r9, r9, r10 ; SPE-NEXT: evmergelo r4, r5, r6 ; SPE-NEXT: mr r30, r3 -; SPE-NEXT: evldd r8, 112(r1) +; SPE-NEXT: evldd r8, 80(r1) ; SPE-NEXT: evmergehi r3, r4, r4 ; SPE-NEXT: evmergehi r5, r9, r9 ; SPE-NEXT: mr r6, r9 -; SPE-NEXT: evldd r29, 120(r1) +; SPE-NEXT: evldd r29, 88(r1) ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 ; SPE-NEXT: evmergehi r7, r8, r8 -; SPE-NEXT: evldd r28, 104(r1) +; SPE-NEXT: evldd r28, 72(r1) ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7 ; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8 ; SPE-NEXT: bl fma @@ -1546,13 +1416,8 @@ ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r0, 100(r1) -; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: lwz r0, 68(r1) +; SPE-NEXT: addi r1, r1, 64 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64( @@ -1696,33 +1561,19 @@ ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 ; SPE-NEXT: stw r0, 4(r1) -; SPE-NEXT: stwu r1, -144(r1) -; SPE-NEXT: .cfi_def_cfa_offset 144 +; SPE-NEXT: stwu r1, -96(r1) +; SPE-NEXT: .cfi_def_cfa_offset 96 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r21, -44 -; SPE-NEXT: .cfi_offset r22, -40 -; SPE-NEXT: .cfi_offset r23, -36 -; SPE-NEXT: .cfi_offset r24, -32 -; SPE-NEXT: .cfi_offset r25, -28 -; SPE-NEXT: .cfi_offset r26, -24 -; SPE-NEXT: .cfi_offset r27, -20 -; SPE-NEXT: .cfi_offset r28, -16 -; SPE-NEXT: .cfi_offset r29, -12 -; SPE-NEXT: .cfi_offset r30, -8 -; SPE-NEXT: .cfi_offset r21, -136 -; SPE-NEXT: .cfi_offset r22, -128 -; SPE-NEXT: .cfi_offset r23, -120 -; SPE-NEXT: .cfi_offset r24, -112 -; SPE-NEXT: .cfi_offset r25, -104 -; SPE-NEXT: .cfi_offset r26, -96 -; SPE-NEXT: .cfi_offset r27, -88 -; SPE-NEXT: .cfi_offset r28, -80 -; SPE-NEXT: .cfi_offset r29, -72 -; SPE-NEXT: .cfi_offset r30, -64 -; SPE-NEXT: stw r25, 116(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r26, 120(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r27, 124(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r28, 128(r1) # 4-byte Folded Spill +; SPE-NEXT: .cfi_offset r21, -88 +; SPE-NEXT: .cfi_offset r22, -80 +; SPE-NEXT: .cfi_offset r23, -72 +; SPE-NEXT: .cfi_offset r24, -64 +; SPE-NEXT: .cfi_offset r25, -56 +; SPE-NEXT: .cfi_offset r26, -48 +; SPE-NEXT: .cfi_offset r27, -40 +; SPE-NEXT: .cfi_offset r28, -32 +; SPE-NEXT: .cfi_offset r29, -24 +; SPE-NEXT: .cfi_offset r30, -16 ; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r25, r3 ; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill @@ -1731,13 +1582,10 @@ ; SPE-NEXT: mr r27, r5 ; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r28, r7 -; SPE-NEXT: lwz r3, 160(r1) -; SPE-NEXT: lwz r4, 152(r1) -; SPE-NEXT: lwz r5, 156(r1) -; SPE-NEXT: lwz r7, 164(r1) -; SPE-NEXT: stw r22, 104(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r23, 108(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r24, 112(r1) # 4-byte Folded Spill +; SPE-NEXT: lwz r3, 112(r1) +; SPE-NEXT: lwz r4, 104(r1) +; SPE-NEXT: lwz r5, 108(r1) +; SPE-NEXT: lwz r7, 116(r1) ; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: efsneg r22, r3 ; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill @@ -1747,9 +1595,6 @@ ; SPE-NEXT: efsneg r5, r7 ; SPE-NEXT: mr r3, r6 ; SPE-NEXT: mr r4, r10 -; SPE-NEXT: stw r21, 100(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r29, 132(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r30, 136(r1) # 4-byte Folded Spill ; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r29, r8 @@ -1773,9 +1618,9 @@ ; SPE-NEXT: bl fmaf ; SPE-NEXT: efsneg r4, r29 ; SPE-NEXT: efsneg r5, r30 +; SPE-NEXT: efsneg r3, r3 ; SPE-NEXT: efsneg r6, r21 ; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload -; SPE-NEXT: efsneg r3, r3 ; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload @@ -1785,18 +1630,8 @@ ; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r30, 136(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r29, 132(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r28, 128(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r27, 124(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r26, 120(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r25, 116(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r24, 112(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r23, 108(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r22, 104(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r21, 100(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r0, 148(r1) -; SPE-NEXT: addi r1, r1, 144 +; SPE-NEXT: lwz r0, 100(r1) +; SPE-NEXT: addi r1, r1, 96 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %neg = fneg <4 x float> %vf2 @@ -1825,28 +1660,18 @@ ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 ; SPE-NEXT: stw r0, 4(r1) -; SPE-NEXT: stwu r1, -96(r1) -; SPE-NEXT: .cfi_def_cfa_offset 96 +; SPE-NEXT: stwu r1, -64(r1) +; SPE-NEXT: .cfi_def_cfa_offset 64 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r26, -24 -; SPE-NEXT: .cfi_offset r27, -20 -; SPE-NEXT: .cfi_offset r28, -16 -; SPE-NEXT: .cfi_offset r29, -12 -; SPE-NEXT: .cfi_offset r30, -8 -; SPE-NEXT: .cfi_offset r26, -80 -; SPE-NEXT: .cfi_offset r27, -72 -; SPE-NEXT: .cfi_offset r28, -64 -; SPE-NEXT: .cfi_offset r29, -56 -; SPE-NEXT: .cfi_offset r30, -48 -; SPE-NEXT: stw r30, 88(r1) # 4-byte Folded Spill +; SPE-NEXT: .cfi_offset r26, -48 +; SPE-NEXT: .cfi_offset r27, -40 +; SPE-NEXT: .cfi_offset r28, -32 +; SPE-NEXT: .cfi_offset r29, -24 +; SPE-NEXT: .cfi_offset r30, -16 ; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r30, r3 -; SPE-NEXT: evldd r3, 112(r1) -; SPE-NEXT: evldd r11, 120(r1) -; SPE-NEXT: stw r26, 72(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r27, 76(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r28, 80(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r29, 84(r1) # 4-byte Folded Spill +; SPE-NEXT: evldd r3, 80(r1) +; SPE-NEXT: evldd r11, 88(r1) ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill ; SPE-NEXT: efdneg r27, r11 @@ -1860,7 +1685,7 @@ ; SPE-NEXT: evmergehi r5, r9, r9 ; SPE-NEXT: evmergehi r7, r8, r8 ; SPE-NEXT: mr r6, r9 -; SPE-NEXT: evldd r28, 104(r1) +; SPE-NEXT: evldd r28, 72(r1) ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5 @@ -1889,13 +1714,8 @@ ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r30, 88(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r29, 84(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r28, 80(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r27, 76(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r26, 72(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r0, 100(r1) -; SPE-NEXT: addi r1, r1, 96 +; SPE-NEXT: lwz r0, 68(r1) +; SPE-NEXT: addi r1, r1, 64 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %neg = fneg <2 x double> %vf2 @@ -2005,24 +1825,16 @@ ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 ; SPE-NEXT: stw r0, 4(r1) -; SPE-NEXT: stwu r1, -80(r1) -; SPE-NEXT: .cfi_def_cfa_offset 80 +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r27, -20 -; SPE-NEXT: .cfi_offset r28, -16 -; SPE-NEXT: .cfi_offset r29, -12 -; SPE-NEXT: .cfi_offset r30, -8 -; SPE-NEXT: .cfi_offset r27, -72 -; SPE-NEXT: .cfi_offset r28, -64 -; SPE-NEXT: .cfi_offset r29, -56 -; SPE-NEXT: .cfi_offset r30, -48 -; SPE-NEXT: stw r28, 64(r1) # 4-byte Folded Spill +; SPE-NEXT: .cfi_offset r27, -40 +; SPE-NEXT: .cfi_offset r28, -32 +; SPE-NEXT: .cfi_offset r29, -24 +; SPE-NEXT: .cfi_offset r30, -16 ; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r28, r3 ; SPE-NEXT: mr r3, r6 -; SPE-NEXT: stw r27, 60(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r29, 68(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill ; SPE-NEXT: evstdd r27, 8(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill ; SPE-NEXT: mr r29, r4 @@ -2045,12 +1857,8 @@ ; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r27, 8(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r0, 84(r1) -; SPE-NEXT: addi r1, r1, 80 +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32( @@ -2076,18 +1884,12 @@ ; SPE: # %bb.0: ; SPE-NEXT: mflr r0 ; SPE-NEXT: stw r0, 4(r1) -; SPE-NEXT: stwu r1, -64(r1) -; SPE-NEXT: .cfi_def_cfa_offset 64 +; SPE-NEXT: stwu r1, -48(r1) +; SPE-NEXT: .cfi_def_cfa_offset 48 ; SPE-NEXT: .cfi_offset lr, 4 -; SPE-NEXT: .cfi_offset r28, -16 -; SPE-NEXT: .cfi_offset r29, -12 -; SPE-NEXT: .cfi_offset r30, -8 -; SPE-NEXT: .cfi_offset r28, -48 -; SPE-NEXT: .cfi_offset r29, -40 -; SPE-NEXT: .cfi_offset r30, -32 -; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill -; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill +; SPE-NEXT: .cfi_offset r28, -32 +; SPE-NEXT: .cfi_offset r29, -24 +; SPE-NEXT: .cfi_offset r30, -16 ; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill ; SPE-NEXT: evstdd r30, 32(r1) # 8-byte Folded Spill @@ -2110,11 +1912,8 @@ ; SPE-NEXT: evldd r30, 32(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload ; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload -; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload -; SPE-NEXT: lwz r0, 68(r1) -; SPE-NEXT: addi r1, r1, 64 +; SPE-NEXT: lwz r0, 52(r1) +; SPE-NEXT: addi r1, r1, 48 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1126,13 +1126,9 @@ ; EFPU2: # %bb.0: # %entry ; EFPU2-NEXT: mflr 0 ; EFPU2-NEXT: stw 0, 4(1) -; EFPU2-NEXT: stwu 1, -96(1) +; EFPU2-NEXT: stwu 1, -80(1) ; EFPU2-NEXT: mfcr 12 -; EFPU2-NEXT: stw 27, 76(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 28, 80(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 29, 84(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 30, 88(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 12, 72(1) +; EFPU2-NEXT: stw 12, 76(1) ; EFPU2-NEXT: evstdd 27, 24(1) # 8-byte Folded Spill ; EFPU2-NEXT: mr 27, 3 ; EFPU2-NEXT: evstdd 28, 32(1) # 8-byte Folded Spill @@ -1163,15 +1159,11 @@ ; EFPU2-NEXT: evldd 30, 48(1) # 8-byte Folded Reload ; EFPU2-NEXT: evldd 29, 40(1) # 8-byte Folded Reload ; EFPU2-NEXT: evldd 28, 32(1) # 8-byte Folded Reload -; EFPU2-NEXT: lwz 12, 72(1) +; EFPU2-NEXT: lwz 12, 76(1) ; EFPU2-NEXT: evldd 27, 24(1) # 8-byte Folded Reload ; EFPU2-NEXT: mtcrf 32, 12 # cr2 -; EFPU2-NEXT: lwz 30, 88(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 29, 84(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 28, 80(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 27, 76(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 0, 100(1) -; EFPU2-NEXT: addi 1, 1, 96 +; EFPU2-NEXT: lwz 0, 84(1) +; EFPU2-NEXT: addi 1, 1, 80 ; EFPU2-NEXT: mtlr 0 ; EFPU2-NEXT: blr entry: @@ -1210,13 +1202,9 @@ ; EFPU2: # %bb.0: # %entry ; EFPU2-NEXT: mflr 0 ; EFPU2-NEXT: stw 0, 4(1) -; EFPU2-NEXT: stwu 1, -96(1) +; EFPU2-NEXT: stwu 1, -80(1) ; EFPU2-NEXT: mfcr 12 -; EFPU2-NEXT: stw 27, 76(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 28, 80(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 29, 84(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 30, 88(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 12, 72(1) +; EFPU2-NEXT: stw 12, 76(1) ; EFPU2-NEXT: evstdd 27, 24(1) # 8-byte Folded Spill ; EFPU2-NEXT: mr 27, 3 ; EFPU2-NEXT: evstdd 28, 32(1) # 8-byte Folded Spill @@ -1238,7 +1226,7 @@ ; EFPU2-NEXT: li 4, 1 ; EFPU2-NEXT: evldd 28, 32(1) # 8-byte Folded Reload ; EFPU2-NEXT: crorc 20, 2, 10 -; EFPU2-NEXT: lwz 12, 72(1) +; EFPU2-NEXT: lwz 12, 76(1) ; EFPU2-NEXT: bc 12, 20, .LBB43_2 ; EFPU2-NEXT: # %bb.1: # %entry ; EFPU2-NEXT: ori 3, 4, 0 @@ -1248,12 +1236,8 @@ ; EFPU2-NEXT: .LBB43_3: # %entry ; EFPU2-NEXT: evldd 27, 24(1) # 8-byte Folded Reload ; EFPU2-NEXT: mtcrf 32, 12 # cr2 -; EFPU2-NEXT: lwz 30, 88(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 29, 84(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 28, 80(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 27, 76(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 0, 100(1) -; EFPU2-NEXT: addi 1, 1, 96 +; EFPU2-NEXT: lwz 0, 84(1) +; EFPU2-NEXT: addi 1, 1, 80 ; EFPU2-NEXT: mtlr 0 ; EFPU2-NEXT: blr entry: @@ -1655,7 +1639,7 @@ ; SPE: # %bb.0: # %entry ; SPE-NEXT: mflr 0 ; SPE-NEXT: stw 0, 4(1) -; SPE-NEXT: stwu 1, -352(1) +; SPE-NEXT: stwu 1, -272(1) ; SPE-NEXT: li 5, 256 ; SPE-NEXT: evstddx 30, 1, 5 # 8-byte Folded Spill ; SPE-NEXT: li 5, 264 @@ -1663,24 +1647,6 @@ ; SPE-NEXT: li 5, .LCPI55_0@l ; SPE-NEXT: lis 6, .LCPI55_0@ha ; SPE-NEXT: evlddx 5, 6, 5 -; SPE-NEXT: stw 14, 280(1) # 4-byte Folded Spill -; SPE-NEXT: stw 15, 284(1) # 4-byte Folded Spill -; SPE-NEXT: stw 16, 288(1) # 4-byte Folded Spill -; SPE-NEXT: stw 17, 292(1) # 4-byte Folded Spill -; SPE-NEXT: stw 18, 296(1) # 4-byte Folded Spill -; SPE-NEXT: stw 19, 300(1) # 4-byte Folded Spill -; SPE-NEXT: stw 20, 304(1) # 4-byte Folded Spill -; SPE-NEXT: stw 21, 308(1) # 4-byte Folded Spill -; SPE-NEXT: stw 22, 312(1) # 4-byte Folded Spill -; SPE-NEXT: stw 23, 316(1) # 4-byte Folded Spill -; SPE-NEXT: stw 24, 320(1) # 4-byte Folded Spill -; SPE-NEXT: stw 25, 324(1) # 4-byte Folded Spill -; SPE-NEXT: stw 26, 328(1) # 4-byte Folded Spill -; SPE-NEXT: stw 27, 332(1) # 4-byte Folded Spill -; SPE-NEXT: stw 28, 336(1) # 4-byte Folded Spill -; SPE-NEXT: stw 29, 340(1) # 4-byte Folded Spill -; SPE-NEXT: stw 30, 344(1) # 4-byte Folded Spill -; SPE-NEXT: stw 31, 348(1) # 4-byte Folded Spill ; SPE-NEXT: evstdd 14, 128(1) # 8-byte Folded Spill ; SPE-NEXT: evstdd 15, 136(1) # 8-byte Folded Spill ; SPE-NEXT: evstdd 16, 144(1) # 8-byte Folded Spill @@ -1698,7 +1664,7 @@ ; SPE-NEXT: evstdd 28, 240(1) # 8-byte Folded Spill ; SPE-NEXT: evstdd 29, 248(1) # 8-byte Folded Spill ; SPE-NEXT: evmergelo 3, 3, 4 -; SPE-NEXT: lwz 4, 360(1) +; SPE-NEXT: lwz 4, 280(1) ; SPE-NEXT: efdadd 3, 3, 3 ; SPE-NEXT: efdadd 3, 3, 5 ; SPE-NEXT: evstdd 3, 24(1) # 8-byte Folded Spill @@ -1724,9 +1690,9 @@ ; SPE-NEXT: evmergehi 3, 4, 4 ; SPE-NEXT: evlddx 31, 1, 5 # 8-byte Folded Reload ; SPE-NEXT: li 5, 256 -; SPE-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload ; SPE-NEXT: evldd 29, 248(1) # 8-byte Folded Reload ; SPE-NEXT: evldd 28, 240(1) # 8-byte Folded Reload ; SPE-NEXT: evldd 27, 232(1) # 8-byte Folded Reload @@ -1743,26 +1709,8 @@ ; SPE-NEXT: evldd 16, 144(1) # 8-byte Folded Reload ; SPE-NEXT: evldd 15, 136(1) # 8-byte Folded Reload ; SPE-NEXT: evldd 14, 128(1) # 8-byte Folded Reload -; SPE-NEXT: lwz 31, 348(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 30, 344(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 29, 340(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 28, 336(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 27, 332(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 26, 328(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 25, 324(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 24, 320(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 23, 316(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 22, 312(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 21, 308(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 20, 304(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 19, 300(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 18, 296(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 17, 292(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 16, 288(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 15, 284(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 14, 280(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 0, 356(1) -; SPE-NEXT: addi 1, 1, 352 +; SPE-NEXT: lwz 0, 276(1) +; SPE-NEXT: addi 1, 1, 272 ; SPE-NEXT: mtlr 0 ; SPE-NEXT: blr ; @@ -1770,18 +1718,14 @@ ; EFPU2: # %bb.0: # %entry ; EFPU2-NEXT: mflr 0 ; EFPU2-NEXT: stw 0, 4(1) -; EFPU2-NEXT: stwu 1, -176(1) +; EFPU2-NEXT: stwu 1, -144(1) ; EFPU2-NEXT: mr 5, 3 ; EFPU2-NEXT: mr 6, 4 -; EFPU2-NEXT: stw 27, 156(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 28, 160(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 29, 164(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 30, 168(1) # 4-byte Folded Spill ; EFPU2-NEXT: evstdd 27, 104(1) # 8-byte Folded Spill ; EFPU2-NEXT: evstdd 28, 112(1) # 8-byte Folded Spill ; EFPU2-NEXT: evstdd 29, 120(1) # 8-byte Folded Spill ; EFPU2-NEXT: evstdd 30, 128(1) # 8-byte Folded Spill -; EFPU2-NEXT: lwz 28, 184(1) +; EFPU2-NEXT: lwz 28, 152(1) ; EFPU2-NEXT: bl __adddf3 ; EFPU2-NEXT: lis 5, 16393 ; EFPU2-NEXT: lis 6, -4069 @@ -1811,12 +1755,8 @@ ; EFPU2-NEXT: evldd 29, 120(1) # 8-byte Folded Reload ; EFPU2-NEXT: evldd 28, 112(1) # 8-byte Folded Reload ; EFPU2-NEXT: evldd 27, 104(1) # 8-byte Folded Reload -; EFPU2-NEXT: lwz 30, 168(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 29, 164(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 28, 160(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 27, 156(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 0, 180(1) -; EFPU2-NEXT: addi 1, 1, 176 +; EFPU2-NEXT: lwz 0, 148(1) +; EFPU2-NEXT: addi 1, 1, 144 ; EFPU2-NEXT: mtlr 0 ; EFPU2-NEXT: blr entry: @@ -1843,10 +1783,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr 0 ; CHECK-NEXT: stw 0, 4(1) -; CHECK-NEXT: stwu 1, -48(1) +; CHECK-NEXT: stwu 1, -32(1) ; CHECK-NEXT: cmpwi 3, 1 -; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill ; CHECK-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill ; CHECK-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill ; CHECK-NEXT: blt 0, .LBB56_3 @@ -1867,13 +1805,11 @@ ; CHECK-NEXT: .LBB56_3: ; CHECK-NEXT: # implicit-def: $r5 ; CHECK-NEXT: .LBB56_4: # %for.cond.cleanup -; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload ; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 0, 52(1) -; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: lwz 0, 36(1) +; CHECK-NEXT: addi 1, 1, 32 ; CHECK-NEXT: mtlr 0 ; CHECK-NEXT: blr entry: @@ -1908,16 +1844,13 @@ ; SPE: # %bb.0: # %entry ; SPE-NEXT: mflr 0 ; SPE-NEXT: stw 0, 4(1) -; SPE-NEXT: stwu 1, -64(1) +; SPE-NEXT: stwu 1, -48(1) ; SPE-NEXT: lwz 4, 0(4) ; SPE-NEXT: lwz 3, 0(3) -; SPE-NEXT: stw 29, 52(1) # 4-byte Folded Spill ; SPE-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill ; SPE-NEXT: efdcfs 29, 4 -; SPE-NEXT: stw 28, 48(1) # 4-byte Folded Spill -; SPE-NEXT: mr 4, 29 -; SPE-NEXT: stw 30, 56(1) # 4-byte Folded Spill ; SPE-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill +; SPE-NEXT: mr 4, 29 ; SPE-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill ; SPE-NEXT: efdcfs 30, 3 ; SPE-NEXT: evmergehi 3, 29, 29 @@ -1935,11 +1868,8 @@ ; SPE-NEXT: evldd 29, 24(1) # 8-byte Folded Reload ; SPE-NEXT: stw 3, 0(3) ; SPE-NEXT: evldd 28, 16(1) # 8-byte Folded Reload -; SPE-NEXT: lwz 30, 56(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 29, 52(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 28, 48(1) # 4-byte Folded Reload -; SPE-NEXT: lwz 0, 68(1) -; SPE-NEXT: addi 1, 1, 64 +; SPE-NEXT: lwz 0, 52(1) +; SPE-NEXT: addi 1, 1, 48 ; SPE-NEXT: mtlr 0 ; SPE-NEXT: blr ; @@ -1947,13 +1877,8 @@ ; EFPU2: # %bb.0: # %entry ; EFPU2-NEXT: mflr 0 ; EFPU2-NEXT: stw 0, 4(1) -; EFPU2-NEXT: stwu 1, -96(1) +; EFPU2-NEXT: stwu 1, -64(1) ; EFPU2-NEXT: lwz 3, 0(3) -; EFPU2-NEXT: stw 26, 72(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 27, 76(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 28, 80(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 29, 84(1) # 4-byte Folded Spill -; EFPU2-NEXT: stw 30, 88(1) # 4-byte Folded Spill ; EFPU2-NEXT: evstdd 26, 16(1) # 8-byte Folded Spill ; EFPU2-NEXT: evstdd 27, 24(1) # 8-byte Folded Spill ; EFPU2-NEXT: evstdd 28, 32(1) # 8-byte Folded Spill @@ -1986,13 +1911,8 @@ ; EFPU2-NEXT: evldd 28, 32(1) # 8-byte Folded Reload ; EFPU2-NEXT: evldd 27, 24(1) # 8-byte Folded Reload ; EFPU2-NEXT: evldd 26, 16(1) # 8-byte Folded Reload -; EFPU2-NEXT: lwz 30, 88(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 29, 84(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 28, 80(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 27, 76(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 26, 72(1) # 4-byte Folded Reload -; EFPU2-NEXT: lwz 0, 100(1) -; EFPU2-NEXT: addi 1, 1, 96 +; EFPU2-NEXT: lwz 0, 68(1) +; EFPU2-NEXT: addi 1, 1, 64 ; EFPU2-NEXT: mtlr 0 ; EFPU2-NEXT: blr entry: