diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2047,7 +2047,8 @@ unsigned StackSize = determineFrameLayout(MF, true); MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || - hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { + hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize)) || + (Subtarget.hasSPE() && hasSpills(MF) && !isInt<8>(StackSize))) { const TargetRegisterClass &GPRC = PPC::GPRCRegClass; const TargetRegisterClass &G8RC = PPC::G8RCRegClass; const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; diff --git a/llvm/test/CodeGen/PowerPC/spe-spills.ll b/llvm/test/CodeGen/PowerPC/spe-spills.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/spe-spills.ll @@ -0,0 +1,721 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %s +; Tests that spill slots are allocated for stacks larger than 256 bytes on +; powerpcspe targets + +@d = local_unnamed_addr global double* null, align 4 +@c = local_unnamed_addr global i32 0, align 4 +@g = local_unnamed_addr global double 0.000000e+00, align 8 +@e = local_unnamed_addr global double* null, align 4 +@h = local_unnamed_addr global double 0.000000e+00, align 8 +@j = local_unnamed_addr global double 0.000000e+00, align 8 +@f = local_unnamed_addr global double 0.000000e+00, align 8 +@a = local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree norecurse nounwind sspstrong uwtable +define i32 @k(double* nocapture readonly %l, double* nocapture %m, i32 %aa, i32 %ab, i32 %n, i32 %ac, i32 %ad) local_unnamed_addr #0 { +; CHECK-LABEL: k: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -480(1) +; CHECK-NEXT: .cfi_def_cfa_offset 480 +; CHECK-NEXT: .cfi_offset r14, -72 +; CHECK-NEXT: .cfi_offset r15, -68 +; CHECK-NEXT: .cfi_offset r16, -64 +; CHECK-NEXT: .cfi_offset r17, -60 +; CHECK-NEXT: .cfi_offset r18, -56 +; CHECK-NEXT: .cfi_offset r19, -52 +; CHECK-NEXT: .cfi_offset r20, -48 +; CHECK-NEXT: .cfi_offset r21, -44 +; CHECK-NEXT: .cfi_offset r22, -40 +; CHECK-NEXT: .cfi_offset r23, -36 +; CHECK-NEXT: .cfi_offset r24, -32 +; CHECK-NEXT: .cfi_offset r25, -28 +; CHECK-NEXT: .cfi_offset r26, -24 +; CHECK-NEXT: .cfi_offset r27, -20 +; CHECK-NEXT: .cfi_offset r28, -16 +; CHECK-NEXT: .cfi_offset r29, -12 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: .cfi_offset r31, -4 +; CHECK-NEXT: .cfi_offset r14, -224 +; CHECK-NEXT: .cfi_offset r15, -216 +; CHECK-NEXT: .cfi_offset r16, -208 +; CHECK-NEXT: .cfi_offset r17, -200 +; CHECK-NEXT: .cfi_offset r18, -192 +; CHECK-NEXT: .cfi_offset r19, -184 +; CHECK-NEXT: .cfi_offset r20, -176 +; CHECK-NEXT: .cfi_offset r21, -168 +; CHECK-NEXT: .cfi_offset r22, -160 +; CHECK-NEXT: .cfi_offset r23, -152 +; CHECK-NEXT: .cfi_offset r24, -144 +; CHECK-NEXT: .cfi_offset r25, -136 +; CHECK-NEXT: .cfi_offset r26, -128 +; CHECK-NEXT: .cfi_offset r27, -120 +; CHECK-NEXT: .cfi_offset r28, -112 +; CHECK-NEXT: .cfi_offset r29, -104 +; CHECK-NEXT: .cfi_offset r30, -96 +; CHECK-NEXT: .cfi_offset r31, -88 +; CHECK-NEXT: li 10, 256 +; CHECK-NEXT: cmpwi 7, 1 +; CHECK-NEXT: stw 14, 408(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 15, 412(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 16, 416(1) # 4-byte Folded Spill +; CHECK-NEXT: evstddx 14, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 264 +; CHECK-NEXT: stw 17, 420(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 18, 424(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 19, 428(1) # 4-byte Folded Spill +; CHECK-NEXT: evstddx 15, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 272 +; CHECK-NEXT: stw 20, 432(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 21, 436(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 22, 440(1) # 4-byte Folded Spill +; CHECK-NEXT: evstddx 16, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 280 +; CHECK-NEXT: stw 23, 444(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 24, 448(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 25, 452(1) # 4-byte Folded Spill +; CHECK-NEXT: evstddx 17, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 288 +; CHECK-NEXT: stw 26, 456(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 27, 460(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 28, 464(1) # 4-byte Folded Spill +; CHECK-NEXT: evstddx 18, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 296 +; CHECK-NEXT: stw 29, 468(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 30, 472(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 31, 476(1) # 4-byte Folded Spill +; CHECK-NEXT: evstddx 19, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 304 +; CHECK-NEXT: stw 4, 68(1) # 4-byte Folded Spill +; CHECK-NEXT: evstddx 20, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 312 +; CHECK-NEXT: evstddx 21, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 320 +; CHECK-NEXT: evstddx 22, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 328 +; CHECK-NEXT: evstddx 23, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 336 +; CHECK-NEXT: evstddx 24, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 344 +; CHECK-NEXT: evstddx 25, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 352 +; CHECK-NEXT: evstddx 26, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 360 +; CHECK-NEXT: evstddx 27, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 368 +; CHECK-NEXT: evstddx 28, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 376 +; CHECK-NEXT: evstddx 29, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 384 +; CHECK-NEXT: evstddx 30, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: li 10, 392 +; CHECK-NEXT: evstddx 31, 1, 10 # 8-byte Folded Spill +; CHECK-NEXT: blt 0, .LBB0_4 +; CHECK-NEXT: # %bb.1: # %for.body.lr.ph +; CHECK-NEXT: lis 4, c@ha +; CHECK-NEXT: mr 0, 8 +; CHECK-NEXT: mr 8, 6 +; CHECK-NEXT: slwi 6, 9, 3 +; CHECK-NEXT: lis 11, d@ha +; CHECK-NEXT: lis 30, e@ha +; CHECK-NEXT: li 12, 1 +; CHECK-NEXT: lis 10, a@ha +; CHECK-NEXT: lwz 4, c@l(4) +; CHECK-NEXT: li 9, .LCPI0_0@l +; CHECK-NEXT: stw 6, 60(1) # 4-byte Folded Spill +; CHECK-NEXT: mulli 6, 8, 24 +; CHECK-NEXT: isel 29, 7, 12, 0 +; CHECK-NEXT: lwz 12, d@l(11) +; CHECK-NEXT: stw 6, 56(1) # 4-byte Folded Spill +; CHECK-NEXT: slwi 6, 8, 3 +; CHECK-NEXT: lwz 30, e@l(30) +; CHECK-NEXT: efdcfsi 4, 4 +; CHECK-NEXT: stw 6, 52(1) # 4-byte Folded Spill +; CHECK-NEXT: lwz 6, a@l(10) +; CHECK-NEXT: lis 10, .LCPI0_0@ha +; CHECK-NEXT: stw 6, 48(1) # 4-byte Folded Spill +; CHECK-NEXT: evstdd 4, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: slwi 4, 0, 3 +; CHECK-NEXT: evlddx 6, 10, 9 +; CHECK-NEXT: evstdd 6, 24(1) # 8-byte Folded Spill +; CHECK-NEXT: stw 4, 36(1) # 4-byte Folded Spill +; CHECK-NEXT: subf 4, 29, 7 +; CHECK-NEXT: li 7, 0 +; CHECK-NEXT: addi 4, 4, 1 +; CHECK-NEXT: mtctr 4 +; CHECK-NEXT: stw 3, 64(1) # 4-byte Folded Spill +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: mulli 4, 5, 192 +; CHECK-NEXT: stw 30, 252(1) # 4-byte Folded Spill +; CHECK-NEXT: evlddx 30, 3, 7 +; CHECK-NEXT: slwi 31, 5, 4 +; CHECK-NEXT: stw 4, 232(1) # 4-byte Folded Spill +; CHECK-NEXT: mulli 8, 5, 120 +; CHECK-NEXT: add 31, 12, 31 +; CHECK-NEXT: add 4, 3, 4 +; CHECK-NEXT: evlddx 31, 31, 7 +; CHECK-NEXT: mulli 0, 5, 56 +; CHECK-NEXT: evlddx 18, 4, 7 +; CHECK-NEXT: add 9, 3, 8 +; CHECK-NEXT: add 22, 12, 8 +; CHECK-NEXT: mulli 4, 5, 80 +; CHECK-NEXT: evlddx 9, 9, 7 +; CHECK-NEXT: evlddx 22, 22, 7 +; CHECK-NEXT: add 21, 12, 0 +; CHECK-NEXT: efdsub 6, 30, 18 +; CHECK-NEXT: evlddx 21, 21, 7 +; CHECK-NEXT: add 4, 3, 4 +; CHECK-NEXT: evstdd 6, 240(1) # 8-byte Folded Spill +; CHECK-NEXT: evlddx 26, 4, 7 +; CHECK-NEXT: mulli 6, 5, 248 +; CHECK-NEXT: mulli 27, 5, 184 +; CHECK-NEXT: add 23, 12, 6 +; CHECK-NEXT: efdsub 4, 26, 30 +; CHECK-NEXT: evlddx 23, 23, 7 +; CHECK-NEXT: evstdd 4, 216(1) # 8-byte Folded Spill +; CHECK-NEXT: add 4, 3, 6 +; CHECK-NEXT: mulli 25, 5, 24 +; CHECK-NEXT: evlddx 4, 4, 7 +; CHECK-NEXT: mulli 24, 5, 152 +; CHECK-NEXT: efdsub 4, 4, 9 +; CHECK-NEXT: add 9, 3, 27 +; CHECK-NEXT: add 27, 12, 27 +; CHECK-NEXT: add 15, 12, 24 +; CHECK-NEXT: evlddx 9, 9, 7 +; CHECK-NEXT: evstdd 4, 224(1) # 8-byte Folded Spill +; CHECK-NEXT: add 4, 3, 0 +; CHECK-NEXT: evlddx 27, 27, 7 +; CHECK-NEXT: mulli 11, 5, 216 +; CHECK-NEXT: evlddx 15, 15, 7 +; CHECK-NEXT: evlddx 4, 4, 7 +; CHECK-NEXT: stw 11, 160(1) # 4-byte Folded Spill +; CHECK-NEXT: lwz 6, 160(1) # 4-byte Folded Reload +; CHECK-NEXT: mulli 28, 5, 88 +; CHECK-NEXT: efdsub 4, 4, 9 +; CHECK-NEXT: add 9, 3, 24 +; CHECK-NEXT: add 24, 12, 6 +; CHECK-NEXT: evlddx 10, 9, 7 +; CHECK-NEXT: evstdd 4, 208(1) # 8-byte Folded Spill +; CHECK-NEXT: add 4, 3, 25 +; CHECK-NEXT: evlddx 24, 24, 7 +; CHECK-NEXT: mulli 9, 5, 40 +; CHECK-NEXT: evlddx 4, 4, 7 +; CHECK-NEXT: stw 9, 140(1) # 4-byte Folded Spill +; CHECK-NEXT: mulli 17, 5, 176 +; CHECK-NEXT: efdsub 4, 4, 10 +; CHECK-NEXT: add 10, 3, 28 +; CHECK-NEXT: add 28, 12, 28 +; CHECK-NEXT: add 14, 3, 17 +; CHECK-NEXT: add 17, 12, 17 +; CHECK-NEXT: evlddx 10, 10, 7 +; CHECK-NEXT: evstdd 4, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: add 4, 3, 11 +; CHECK-NEXT: evlddx 14, 14, 7 +; CHECK-NEXT: mulli 11, 5, 168 +; CHECK-NEXT: evlddx 17, 17, 7 +; CHECK-NEXT: evlddx 4, 4, 7 +; CHECK-NEXT: stw 11, 116(1) # 4-byte Folded Spill +; CHECK-NEXT: evlddx 28, 28, 7 +; CHECK-NEXT: mulli 16, 5, 96 +; CHECK-NEXT: add 29, 3, 11 +; CHECK-NEXT: efdsub 4, 4, 10 +; CHECK-NEXT: add 10, 3, 9 +; CHECK-NEXT: evlddx 29, 29, 7 +; CHECK-NEXT: add 16, 3, 16 +; CHECK-NEXT: evlddx 10, 10, 7 +; CHECK-NEXT: evstdd 4, 192(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 9, 5, 232 +; CHECK-NEXT: mulli 4, 5, 104 +; CHECK-NEXT: add 11, 3, 9 +; CHECK-NEXT: efdsub 10, 10, 29 +; CHECK-NEXT: evlddx 11, 11, 7 +; CHECK-NEXT: add 29, 3, 4 +; CHECK-NEXT: add 4, 12, 4 +; CHECK-NEXT: evstdd 10, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: evlddx 29, 29, 7 +; CHECK-NEXT: mulli 20, 5, 48 +; CHECK-NEXT: evlddx 4, 4, 7 +; CHECK-NEXT: mulli 0, 5, 136 +; CHECK-NEXT: add 20, 12, 20 +; CHECK-NEXT: efdsub 10, 11, 29 +; CHECK-NEXT: slwi 11, 5, 3 +; CHECK-NEXT: evlddx 20, 20, 7 +; CHECK-NEXT: add 8, 12, 11 +; CHECK-NEXT: evstdd 10, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 29, 5, 112 +; CHECK-NEXT: efdsub 10, 30, 14 +; CHECK-NEXT: add 19, 3, 29 +; CHECK-NEXT: add 29, 12, 29 +; CHECK-NEXT: evstdd 10, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: evlddx 19, 19, 7 +; CHECK-NEXT: evlddx 10, 16, 7 +; CHECK-NEXT: evstdd 10, 88(1) # 8-byte Folded Spill +; CHECK-NEXT: efdsub 3, 30, 19 +; CHECK-NEXT: efdadd 26, 26, 19 +; CHECK-NEXT: evlddx 19, 12, 7 +; CHECK-NEXT: efdadd 18, 18, 10 +; CHECK-NEXT: add 30, 12, 25 +; CHECK-NEXT: lwz 10, 232(1) # 4-byte Folded Reload +; CHECK-NEXT: evstdd 3, 176(1) # 8-byte Folded Spill +; CHECK-NEXT: evlddx 30, 30, 7 +; CHECK-NEXT: efdsub 3, 18, 26 +; CHECK-NEXT: add 10, 12, 10 +; CHECK-NEXT: evstdd 3, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 18, 5, 240 +; CHECK-NEXT: efdneg 3, 19 +; CHECK-NEXT: add 18, 12, 18 +; CHECK-NEXT: evstdd 3, 104(1) # 8-byte Folded Spill +; CHECK-NEXT: evlddx 16, 18, 7 +; CHECK-NEXT: evlddx 18, 29, 7 +; CHECK-NEXT: efdsub 3, 19, 31 +; CHECK-NEXT: efdsub 6, 30, 15 +; CHECK-NEXT: evlddx 29, 8, 7 +; CHECK-NEXT: lwz 8, 140(1) # 4-byte Folded Reload +; CHECK-NEXT: efdadd 30, 30, 15 +; CHECK-NEXT: evstdd 3, 96(1) # 8-byte Folded Spill +; CHECK-NEXT: evstdd 6, 160(1) # 8-byte Folded Spill +; CHECK-NEXT: add 6, 12, 9 +; CHECK-NEXT: lwz 9, 116(1) # 4-byte Folded Reload +; CHECK-NEXT: efdsub 3, 16, 18 +; CHECK-NEXT: add 8, 12, 8 +; CHECK-NEXT: evlddx 6, 6, 7 +; CHECK-NEXT: evlddx 8, 8, 7 +; CHECK-NEXT: evstdd 3, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: add 9, 12, 9 +; CHECK-NEXT: efdsub 3, 20, 17 +; CHECK-NEXT: evlddx 9, 9, 7 +; CHECK-NEXT: evstdd 3, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: efdsub 3, 23, 22 +; CHECK-NEXT: efdadd 23, 23, 22 +; CHECK-NEXT: efdadd 8, 8, 9 +; CHECK-NEXT: evstdd 3, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: efdsub 3, 21, 27 +; CHECK-NEXT: efdadd 27, 21, 27 +; CHECK-NEXT: efdadd 9, 23, 27 +; CHECK-NEXT: evldd 27, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: evstdd 3, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 3, 5, 72 +; CHECK-NEXT: mulli 25, 5, 200 +; CHECK-NEXT: add 5, 12, 0 +; CHECK-NEXT: add 3, 12, 3 +; CHECK-NEXT: evlddx 5, 5, 7 +; CHECK-NEXT: efdsub 14, 6, 4 +; CHECK-NEXT: efdadd 4, 6, 4 +; CHECK-NEXT: evlddx 6, 10, 7 +; CHECK-NEXT: evlddx 3, 3, 7 +; CHECK-NEXT: efdadd 4, 8, 4 +; CHECK-NEXT: efdadd 8, 16, 18 +; CHECK-NEXT: add 25, 12, 25 +; CHECK-NEXT: evstdd 14, 232(1) # 8-byte Folded Spill +; CHECK-NEXT: evlddx 25, 25, 7 +; CHECK-NEXT: efdadd 5, 29, 5 +; CHECK-NEXT: efdsub 26, 24, 28 +; CHECK-NEXT: efdadd 28, 24, 28 +; CHECK-NEXT: efdadd 6, 19, 6 +; CHECK-NEXT: efdadd 10, 30, 28 +; CHECK-NEXT: efdadd 6, 6, 31 +; CHECK-NEXT: lwz 28, 68(1) # 4-byte Folded Reload +; CHECK-NEXT: efdadd 29, 3, 25 +; CHECK-NEXT: efdadd 5, 5, 29 +; CHECK-NEXT: efdsub 0, 3, 25 +; CHECK-NEXT: lwz 3, 64(1) # 4-byte Folded Reload +; CHECK-NEXT: efdadd 30, 5, 4 +; CHECK-NEXT: efdadd 4, 9, 10 +; CHECK-NEXT: efdadd 9, 20, 17 +; CHECK-NEXT: evldd 10, 72(1) # 8-byte Folded Reload +; CHECK-NEXT: efdadd 8, 8, 9 +; CHECK-NEXT: evldd 9, 80(1) # 8-byte Folded Reload +; CHECK-NEXT: efdadd 6, 6, 8 +; CHECK-NEXT: evldd 8, 144(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 25, 160(1) # 8-byte Folded Reload +; CHECK-NEXT: efdsub 5, 30, 4 +; CHECK-NEXT: efdadd 4, 4, 30 +; CHECK-NEXT: evldd 30, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: efdsub 29, 6, 4 +; CHECK-NEXT: evldd 4, 216(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 6, 104(1) # 8-byte Folded Reload +; CHECK-NEXT: efdsub 8, 9, 8 +; CHECK-NEXT: evldd 9, 208(1) # 8-byte Folded Reload +; CHECK-NEXT: efdsub 4, 6, 4 +; CHECK-NEXT: add 6, 3, 11 +; CHECK-NEXT: evldd 11, 88(1) # 8-byte Folded Reload +; CHECK-NEXT: evlddx 6, 6, 7 +; CHECK-NEXT: efdsub 9, 10, 9 +; CHECK-NEXT: evldd 10, 96(1) # 8-byte Folded Reload +; CHECK-NEXT: efdsub 6, 0, 6 +; CHECK-NEXT: efdadd 10, 11, 10 +; CHECK-NEXT: evldd 11, 184(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 0, 168(1) # 8-byte Folded Reload +; CHECK-NEXT: efdmul 10, 10, 27 +; CHECK-NEXT: efdadd 11, 11, 0 +; CHECK-NEXT: evldd 0, 240(1) # 8-byte Folded Reload +; CHECK-NEXT: efdmul 11, 11, 27 +; CHECK-NEXT: efdsub 10, 10, 0 +; CHECK-NEXT: evldd 0, 176(1) # 8-byte Folded Reload +; CHECK-NEXT: efdsub 6, 11, 6 +; CHECK-NEXT: evldd 11, 200(1) # 8-byte Folded Reload +; CHECK-NEXT: efdadd 0, 0, 30 +; CHECK-NEXT: evldd 30, 192(1) # 8-byte Folded Reload +; CHECK-NEXT: efdmul 8, 8, 27 +; CHECK-NEXT: efdsub 11, 25, 11 +; CHECK-NEXT: efdsub 8, 8, 0 +; CHECK-NEXT: efdsub 0, 30, 26 +; CHECK-NEXT: efdadd 30, 30, 26 +; CHECK-NEXT: evldd 26, 128(1) # 8-byte Folded Reload +; CHECK-NEXT: efdadd 11, 11, 30 +; CHECK-NEXT: lwz 30, 252(1) # 4-byte Folded Reload +; CHECK-NEXT: efdmul 11, 11, 27 +; CHECK-NEXT: efdadd 0, 25, 0 +; CHECK-NEXT: efdmul 0, 0, 27 +; CHECK-NEXT: efdsub 9, 11, 9 +; CHECK-NEXT: evldd 11, 224(1) # 8-byte Folded Reload +; CHECK-NEXT: efdadd 11, 11, 26 +; CHECK-NEXT: evldd 26, 232(1) # 8-byte Folded Reload +; CHECK-NEXT: efdsub 11, 0, 11 +; CHECK-NEXT: evldd 0, 24(1) # 8-byte Folded Reload +; CHECK-NEXT: efdmul 4, 4, 0 +; CHECK-NEXT: efdsub 9, 9, 11 +; CHECK-NEXT: efdmul 6, 6, 0 +; CHECK-NEXT: evldd 0, 152(1) # 8-byte Folded Reload +; CHECK-NEXT: efdsub 4, 8, 4 +; CHECK-NEXT: efdmul 8, 9, 27 +; CHECK-NEXT: efdmul 9, 4, 27 +; CHECK-NEXT: efdadd 8, 8, 6 +; CHECK-NEXT: efdsub 5, 0, 5 +; CHECK-NEXT: li 0, g@l +; CHECK-NEXT: efdadd 9, 10, 9 +; CHECK-NEXT: efdmul 8, 8, 27 +; CHECK-NEXT: lis 27, g@ha +; CHECK-NEXT: evstddx 26, 27, 0 +; CHECK-NEXT: lwz 0, 52(1) # 4-byte Folded Reload +; CHECK-NEXT: evstddx 29, 30, 0 +; CHECK-NEXT: evstdd 5, 0(28) +; CHECK-NEXT: efdsub 5, 8, 9 +; CHECK-NEXT: li 8, j@l +; CHECK-NEXT: lis 9, j@ha +; CHECK-NEXT: evstddx 6, 9, 8 +; CHECK-NEXT: li 6, f@l +; CHECK-NEXT: lis 8, f@ha +; CHECK-NEXT: evstddx 11, 8, 6 +; CHECK-NEXT: li 6, h@l +; CHECK-NEXT: lis 8, h@ha +; CHECK-NEXT: evstddx 4, 8, 6 +; CHECK-NEXT: lwz 4, 56(1) # 4-byte Folded Reload +; CHECK-NEXT: evstddx 5, 30, 4 +; CHECK-NEXT: lwz 4, 60(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 5, 48(1) # 4-byte Folded Reload +; CHECK-NEXT: add 30, 30, 4 +; CHECK-NEXT: lwz 4, 36(1) # 4-byte Folded Reload +; CHECK-NEXT: add 7, 7, 4 +; CHECK-NEXT: bdnz .LBB0_2 +; CHECK-NEXT: # %bb.3: # %for.cond.for.end_crit_edge +; CHECK-NEXT: add 3, 12, 7 +; CHECK-NEXT: lis 4, d@ha +; CHECK-NEXT: lis 5, e@ha +; CHECK-NEXT: stw 3, d@l(4) +; CHECK-NEXT: stw 30, e@l(5) +; CHECK-NEXT: .LBB0_4: # %for.end +; CHECK-NEXT: li 3, 392 +; CHECK-NEXT: evlddx 31, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 384 +; CHECK-NEXT: lwz 31, 476(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 30, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 376 +; CHECK-NEXT: lwz 30, 472(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 29, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 368 +; CHECK-NEXT: lwz 29, 468(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 28, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 360 +; CHECK-NEXT: lwz 28, 464(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 27, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 352 +; CHECK-NEXT: lwz 27, 460(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 26, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 344 +; CHECK-NEXT: lwz 26, 456(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 25, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 336 +; CHECK-NEXT: lwz 25, 452(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 24, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 328 +; CHECK-NEXT: lwz 24, 448(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 23, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 320 +; CHECK-NEXT: lwz 23, 444(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 22, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 312 +; CHECK-NEXT: lwz 22, 440(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 21, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 304 +; CHECK-NEXT: lwz 21, 436(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 20, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 296 +; CHECK-NEXT: lwz 20, 432(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 19, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 288 +; CHECK-NEXT: lwz 19, 428(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 18, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 280 +; CHECK-NEXT: lwz 18, 424(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 17, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 272 +; CHECK-NEXT: lwz 17, 420(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 16, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 264 +; CHECK-NEXT: lwz 16, 416(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 15, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: li 3, 256 +; CHECK-NEXT: lwz 15, 412(1) # 4-byte Folded Reload +; CHECK-NEXT: evlddx 14, 1, 3 # 8-byte Folded Reload +; CHECK-NEXT: lwz 14, 408(1) # 4-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 480 +; CHECK-NEXT: blr +entry: + %cmp388 = icmp sgt i32 %n, 0 + br i1 %cmp388, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %0 = load i32, i32* @c, align 4, !tbaa !3 + %conv = sitofp i32 %0 to double + %mul174 = mul nsw i32 %ab, 3 + %1 = load i32, i32* @a, align 4, !tbaa !3 + %d.promoted = load double*, double** @d, align 4, !tbaa !7 + %e.promoted = load double*, double** @e, align 4, !tbaa !7 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %add.ptr178393 = phi double* [ %e.promoted, %for.body.lr.ph ], [ %add.ptr178, %for.body ] + %add.ptr177392 = phi double* [ %d.promoted, %for.body.lr.ph ], [ %add.ptr177, %for.body ] + %l.addr.0391 = phi double* [ %l, %for.body.lr.ph ], [ %add.ptr, %for.body ] + %aa.addr.0390 = phi i32 [ %aa, %for.body.lr.ph ], [ %1, %for.body ] + %i.0389 = phi i32 [ %n, %for.body.lr.ph ], [ %sub176, %for.body ] + %2 = load double, double* %add.ptr177392, align 8, !tbaa !9 + %3 = load double, double* %l.addr.0391, align 8, !tbaa !9 + %mul = mul nsw i32 %aa.addr.0390, 24 + %arrayidx2 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul + %4 = load double, double* %arrayidx2, align 8, !tbaa !9 + %sub = fsub double %3, %4 + %arrayidx4 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul + %5 = load double, double* %arrayidx4, align 8, !tbaa !9 + %add = fadd double %2, %5 + %mul5 = mul nsw i32 %aa.addr.0390, 12 + %arrayidx6 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul5 + %6 = load double, double* %arrayidx6, align 8, !tbaa !9 + %mul8 = shl nsw i32 %aa.addr.0390, 1 + %arrayidx9 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul8 + %7 = load double, double* %arrayidx9, align 8, !tbaa !9 + %sub10 = fsub double %2, %7 + %add11 = fadd double %6, %sub10 + %fneg = fneg double %2 + %mul13 = mul nsw i32 %aa.addr.0390, 10 + %arrayidx14 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul13 + %8 = load double, double* %arrayidx14, align 8, !tbaa !9 + %sub16 = fsub double %8, %3 + %sub17 = fsub double %fneg, %sub16 + %mul18 = fmul double %sub17, 0.000000e+00 + %mul20 = mul nsw i32 %aa.addr.0390, 14 + %arrayidx21 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul20 + %9 = load double, double* %arrayidx21, align 8, !tbaa !9 + %sub22 = fsub double %3, %9 + %mul23 = mul nsw i32 %aa.addr.0390, 30 + %arrayidx24 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul23 + %10 = load double, double* %arrayidx24, align 8, !tbaa !9 + %arrayidx26 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul20 + %11 = load double, double* %arrayidx26, align 8, !tbaa !9 + %add27 = fadd double %10, %11 + %sub28 = fsub double %10, %11 + %mul30 = mul nsw i32 %aa.addr.0390, 22 + %arrayidx31 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul30 + %12 = load double, double* %arrayidx31, align 8, !tbaa !9 + %sub32 = fsub double %3, %12 + %mul33 = mul nsw i32 %aa.addr.0390, 6 + %arrayidx34 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul33 + %13 = load double, double* %arrayidx34, align 8, !tbaa !9 + %arrayidx36 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul30 + %14 = load double, double* %arrayidx36, align 8, !tbaa !9 + %add37 = fadd double %13, %14 + %sub38 = fsub double %13, %14 + %add39 = fadd double %add27, %add37 + %sub40 = fsub double %sub28, %sub32 + %add41 = fadd double %sub22, %sub38 + %mul43 = fmul double %sub40, %conv + %sub44 = fsub double %mul43, %add41 + %mul45 = mul nsw i32 %aa.addr.0390, 31 + %arrayidx46 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul45 + %15 = load double, double* %arrayidx46, align 8, !tbaa !9 + %mul47 = mul nsw i32 %aa.addr.0390, 15 + %arrayidx48 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul47 + %16 = load double, double* %arrayidx48, align 8, !tbaa !9 + %sub49 = fsub double %15, %16 + %arrayidx51 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul45 + %17 = load double, double* %arrayidx51, align 8, !tbaa !9 + %arrayidx53 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul47 + %18 = load double, double* %arrayidx53, align 8, !tbaa !9 + %sub54 = fsub double %17, %18 + %add55 = fadd double %17, %18 + %mul56 = mul nsw i32 %aa.addr.0390, 7 + %arrayidx57 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul56 + %19 = load double, double* %arrayidx57, align 8, !tbaa !9 + %mul58 = mul nsw i32 %aa.addr.0390, 23 + %arrayidx59 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul58 + %20 = load double, double* %arrayidx59, align 8, !tbaa !9 + %sub60 = fsub double %19, %20 + %arrayidx62 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul56 + %21 = load double, double* %arrayidx62, align 8, !tbaa !9 + %arrayidx64 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul58 + %22 = load double, double* %arrayidx64, align 8, !tbaa !9 + %sub65 = fsub double %21, %22 + %add66 = fadd double %21, %22 + %mul67 = mul nsw i32 %aa.addr.0390, 3 + %arrayidx68 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul67 + %23 = load double, double* %arrayidx68, align 8, !tbaa !9 + %mul69 = mul nsw i32 %aa.addr.0390, 19 + %arrayidx70 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul69 + %24 = load double, double* %arrayidx70, align 8, !tbaa !9 + %sub71 = fsub double %23, %24 + %arrayidx73 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul67 + %25 = load double, double* %arrayidx73, align 8, !tbaa !9 + %arrayidx75 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul69 + %26 = load double, double* %arrayidx75, align 8, !tbaa !9 + %sub76 = fsub double %25, %26 + %add77 = fadd double %25, %26 + %mul78 = mul nsw i32 %aa.addr.0390, 27 + %arrayidx79 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul78 + %27 = load double, double* %arrayidx79, align 8, !tbaa !9 + %mul80 = mul nsw i32 %aa.addr.0390, 11 + %arrayidx81 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul80 + %28 = load double, double* %arrayidx81, align 8, !tbaa !9 + %sub82 = fsub double %27, %28 + %arrayidx84 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul78 + %29 = load double, double* %arrayidx84, align 8, !tbaa !9 + %arrayidx86 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul80 + %30 = load double, double* %arrayidx86, align 8, !tbaa !9 + %sub87 = fsub double %29, %30 + %add88 = fadd double %29, %30 + %sub89 = fsub double %sub54, %sub60 + %add90 = fadd double %sub49, %sub65 + %add91 = fadd double %add55, %add66 + %add92 = fadd double %add77, %add88 + %sub93 = fsub double %sub76, %sub71 + %add94 = fadd double %sub82, %sub87 + %add95 = fadd double %sub93, %add94 + %sub96 = fsub double %sub82, %sub87 + %add97 = fadd double %sub76, %sub96 + %arrayidx98 = getelementptr inbounds double, double* %l.addr.0391, i32 %aa.addr.0390 + %31 = load double, double* %arrayidx98, align 8, !tbaa !9 + %arrayidx100 = getelementptr inbounds double, double* %add.ptr177392, i32 %aa.addr.0390 + %32 = load double, double* %arrayidx100, align 8, !tbaa !9 + %mul101 = mul nsw i32 %aa.addr.0390, 17 + %arrayidx102 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul101 + %33 = load double, double* %arrayidx102, align 8, !tbaa !9 + %add103 = fadd double %32, %33 + %mul104 = mul nsw i32 %aa.addr.0390, 9 + %arrayidx105 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul104 + %34 = load double, double* %arrayidx105, align 8, !tbaa !9 + %mul106 = mul nsw i32 %aa.addr.0390, 25 + %arrayidx107 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul106 + %35 = load double, double* %arrayidx107, align 8, !tbaa !9 + %sub108 = fsub double %34, %35 + %add109 = fadd double %34, %35 + %mul110 = mul nsw i32 %aa.addr.0390, 5 + %arrayidx111 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul110 + %36 = load double, double* %arrayidx111, align 8, !tbaa !9 + %mul112 = mul nsw i32 %aa.addr.0390, 21 + %arrayidx113 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul112 + %37 = load double, double* %arrayidx113, align 8, !tbaa !9 + %sub114 = fsub double %36, %37 + %arrayidx116 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul110 + %38 = load double, double* %arrayidx116, align 8, !tbaa !9 + %arrayidx118 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul112 + %39 = load double, double* %arrayidx118, align 8, !tbaa !9 + %add119 = fadd double %38, %39 + %mul120 = mul nsw i32 %aa.addr.0390, 29 + %arrayidx121 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul120 + %40 = load double, double* %arrayidx121, align 8, !tbaa !9 + %mul122 = mul nsw i32 %aa.addr.0390, 13 + %arrayidx123 = getelementptr inbounds double, double* %l.addr.0391, i32 %mul122 + %41 = load double, double* %arrayidx123, align 8, !tbaa !9 + %sub124 = fsub double %40, %41 + %arrayidx126 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul120 + %42 = load double, double* %arrayidx126, align 8, !tbaa !9 + %arrayidx128 = getelementptr inbounds double, double* %add.ptr177392, i32 %mul122 + %43 = load double, double* %arrayidx128, align 8, !tbaa !9 + %sub129 = fsub double %42, %43 + store double %sub129, double* @g, align 8, !tbaa !9 + %add130 = fadd double %42, %43 + %add131 = fsub double %sub108, %31 + %add132 = fadd double %add103, %add109 + %add133 = fadd double %add119, %add130 + %add134 = fadd double %sub114, %sub124 + %add135 = fadd double %4, %6 + %add136 = fadd double %8, %9 + %sub137 = fsub double %add135, %add136 + %add138 = fadd double %add132, %add133 + %add139 = fadd double %add91, %add92 + %sub140 = fsub double %add138, %add139 + %add141 = fadd double %add139, %add138 + %add142 = fadd double %add, %7 + %add143 = fadd double %add142, %add39 + %sub144 = fsub double %add143, %add141 + %arrayidx145 = getelementptr inbounds double, double* %add.ptr178393, i32 %ab + store double %sub144, double* %arrayidx145, align 8, !tbaa !9 + %sub146 = fsub double %sub137, %sub140 + store double %sub146, double* %m, align 8, !tbaa !9 + %mul150 = fmul double %add11, %conv + %sub151 = fsub double %mul150, %sub + %sub153 = fsub double %sub44, %mul18 + store double %sub153, double* @h, align 8, !tbaa !9 + %mul155 = fmul double %sub153, %conv + %sub156 = fadd double %sub151, %mul155 + %mul158 = fmul double %add134, %conv + %sub159 = fsub double %mul158, %add131 + %mul160 = fmul double %sub159, 0.000000e+00 + store double %mul160, double* @j, align 8, !tbaa !9 + %mul162 = fmul double %add95, %conv + %sub163 = fsub double %mul162, %sub89 + %mul165 = fmul double %add97, %conv + %sub166 = fsub double %mul165, %add90 + store double %sub166, double* @f, align 8, !tbaa !9 + %sub168 = fsub double %sub163, %sub166 + %mul169 = fmul double %sub168, %conv + %add170 = fadd double %mul169, %mul160 + %mul172 = fmul double %add170, %conv + %sub173 = fsub double %mul172, %sub156 + %arrayidx175 = getelementptr inbounds double, double* %add.ptr178393, i32 %mul174 + store double %sub173, double* %arrayidx175, align 8, !tbaa !9 + %sub176 = add nsw i32 %i.0389, -1 + %add.ptr = getelementptr inbounds double, double* %l.addr.0391, i32 %ac + %add.ptr177 = getelementptr inbounds double, double* %add.ptr177392, i32 %ac + %add.ptr178 = getelementptr inbounds double, double* %add.ptr178393, i32 %ad + %cmp = icmp sgt i32 %i.0389, 1 + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body + store double* %add.ptr177, double** @d, align 4, !tbaa !7 + store double* %add.ptr178, double** @e, align 4, !tbaa !7 + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret i32 undef +} + +attributes #0 = { nofree norecurse nounwind sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="e500" "target-features"="+spe,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-htm,-power8-vector,-power9-vector,-qpx,-vsx" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 48571b6ce89a12a323c8632cea05afd0c91aeb32)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"any pointer", !5, i64 0} +!9 = !{!10, !10, i64 0} +!10 = !{!"double", !5, i64 0}