Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h @@ -357,6 +357,22 @@ unsigned SrcReg2, int Mask, int Value, const MachineRegisterInfo *MRI) const override; + + /// Return true if get the base operand, byte offset of an instruction and + /// the memory width. Width is the size of memory that is being + /// loaded/stored (e.g. 1, 2, 4, 8). + bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt, + const MachineOperand *&BaseOp, + int64_t &Offset, unsigned &Width, + const TargetRegisterInfo *TRI) const; + + /// Return true if two MIs access different memory addresses and false + /// otherwise + bool + areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, + const MachineInstr &MIb, + AliasAnalysis *AA = nullptr) const override; + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3996,3 +3996,59 @@ return LoopCountReg; } +// Return true if get the base operand, byte offset of an instruction and the +// memory width. Width is the size of memory that is being loaded/stored. +bool PPCInstrInfo::getMemOperandWithOffsetWidth( + const MachineInstr &LdSt, + const MachineOperand *&BaseReg, + int64_t &Offset, + unsigned &Width, + const TargetRegisterInfo *TRI) const { + assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); + + // Handle only loads/stores with base register followed by immediate offset. + if (LdSt.getNumExplicitOperands() != 3) + return false; + if (!LdSt.getOperand(1).isImm() || !LdSt.getOperand(2).isReg()) + return false; + + if (!LdSt.hasOneMemOperand()) + return false; + + Width = (*LdSt.memoperands_begin())->getSize(); + Offset = LdSt.getOperand(1).getImm(); + BaseReg = &LdSt.getOperand(2); + return true; +} + +bool PPCInstrInfo::areMemAccessesTriviallyDisjoint( + const MachineInstr &MIa, const MachineInstr &MIb, + AliasAnalysis * /*AA*/) const { + assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); + assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); + + if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || + MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) + return false; + + // Retrieve the base register, offset from the base register and width. Width + // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If + // base registers are identical, and the offset of a lower memory access + + // the width doesn't overlap the offset of a higher memory access, + // then the memory accesses are different. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; + int64_t OffsetA = 0, OffsetB = 0; + unsigned int WidthA = 0, WidthB = 0; + if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && + getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { + if (BaseOpA->isIdenticalTo(*BaseOpB)) { + int LowOffset = std::min(OffsetA, OffsetB); + int HighOffset = std::max(OffsetA, OffsetB); + int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; + if (LowOffset + LowWidth <= HighOffset) + return true; + } + } + return false; +} Index: llvm/trunk/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ llvm/trunk/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -12,26 +12,26 @@ ; CHECK-NEXT: stw 30, 416(1) # 4-byte Folded Spill ; CHECK-NEXT: lis 3, .LCPI0_0@ha ; CHECK-NEXT: stw 12, 408(1) +; CHECK-NEXT: stfd 2, 376(1) ; CHECK-NEXT: stfd 27, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 1, 384(1) ; CHECK-NEXT: stfd 28, 432(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 29, 440(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 30, 448(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 31, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 2, 376(1) -; CHECK-NEXT: lfs 27, .LCPI0_0@l(3) -; CHECK-NEXT: stfd 1, 384(1) ; CHECK-NEXT: lwz 4, 380(1) +; CHECK-NEXT: lfs 27, .LCPI0_0@l(3) +; CHECK-NEXT: lwz 3, 384(1) +; CHECK-NEXT: stw 4, 396(1) ; CHECK-NEXT: fcmpu 0, 2, 27 +; CHECK-NEXT: lwz 4, 376(1) ; CHECK-NEXT: fcmpu 1, 1, 27 ; CHECK-NEXT: crand 20, 6, 0 -; CHECK-NEXT: stw 4, 396(1) ; CHECK-NEXT: cror 20, 4, 20 -; CHECK-NEXT: lwz 4, 376(1) ; CHECK-NEXT: stw 4, 392(1) +; CHECK-NEXT: stw 3, 400(1) ; CHECK-NEXT: lwz 4, 388(1) ; CHECK-NEXT: stw 4, 404(1) -; CHECK-NEXT: lwz 3, 384(1) -; CHECK-NEXT: stw 3, 400(1) ; CHECK-NEXT: bc 4, 20, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb5 ; CHECK-NEXT: li 3, 0 @@ -39,50 +39,50 @@ ; CHECK-NEXT: b .LBB0_16 ; CHECK-NEXT: .LBB0_2: # %bb1 ; CHECK-NEXT: lfd 0, 400(1) +; CHECK-NEXT: lis 3, 15856 +; CHECK-NEXT: stw 3, 336(1) ; CHECK-NEXT: lfd 1, 392(1) ; CHECK-NEXT: li 29, 0 -; CHECK-NEXT: lis 3, 15856 ; CHECK-NEXT: stfd 0, 304(1) -; CHECK-NEXT: stfd 1, 296(1) ; CHECK-NEXT: stw 29, 340(1) -; CHECK-NEXT: stw 3, 336(1) ; CHECK-NEXT: stw 29, 332(1) ; CHECK-NEXT: stw 29, 328(1) ; CHECK-NEXT: lwz 3, 308(1) +; CHECK-NEXT: stfd 1, 296(1) +; CHECK-NEXT: lfd 3, 336(1) +; CHECK-NEXT: lfd 4, 328(1) ; CHECK-NEXT: stw 3, 324(1) ; CHECK-NEXT: lwz 3, 304(1) ; CHECK-NEXT: stw 3, 320(1) ; CHECK-NEXT: lwz 3, 300(1) +; CHECK-NEXT: lfd 31, 320(1) ; CHECK-NEXT: stw 3, 316(1) +; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: lwz 3, 296(1) ; CHECK-NEXT: stw 3, 312(1) -; CHECK-NEXT: lfd 31, 320(1) ; CHECK-NEXT: lfd 30, 312(1) -; CHECK-NEXT: lfd 3, 336(1) -; CHECK-NEXT: lfd 4, 328(1) -; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: fmr 2, 30 ; CHECK-NEXT: bl __gcc_qmul@PLT ; CHECK-NEXT: lis 3, 16864 ; CHECK-NEXT: stfd 1, 280(1) -; CHECK-NEXT: fmr 29, 1 +; CHECK-NEXT: stw 3, 368(1) ; CHECK-NEXT: stfd 2, 288(1) -; CHECK-NEXT: fmr 28, 2 ; CHECK-NEXT: stw 29, 372(1) -; CHECK-NEXT: stw 3, 368(1) ; CHECK-NEXT: stw 29, 364(1) ; CHECK-NEXT: stw 29, 360(1) +; CHECK-NEXT: fmr 29, 1 ; CHECK-NEXT: lwz 3, 284(1) +; CHECK-NEXT: fmr 28, 2 +; CHECK-NEXT: lfd 3, 368(1) +; CHECK-NEXT: lfd 4, 360(1) ; CHECK-NEXT: stw 3, 356(1) ; CHECK-NEXT: lwz 3, 280(1) ; CHECK-NEXT: stw 3, 352(1) ; CHECK-NEXT: lwz 3, 292(1) +; CHECK-NEXT: lfd 1, 352(1) ; CHECK-NEXT: stw 3, 348(1) ; CHECK-NEXT: lwz 3, 288(1) ; CHECK-NEXT: stw 3, 344(1) -; CHECK-NEXT: lfd 3, 368(1) -; CHECK-NEXT: lfd 4, 360(1) -; CHECK-NEXT: lfd 1, 352(1) ; CHECK-NEXT: lfd 2, 344(1) ; CHECK-NEXT: bl __gcc_qsub@PLT ; CHECK-NEXT: mffs 0 @@ -102,8 +102,8 @@ ; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) ; CHECK-NEXT: fctiwz 0, 0 ; CHECK-NEXT: stfd 0, 152(1) -; CHECK-NEXT: fcmpu 1, 29, 1 ; CHECK-NEXT: lwz 3, 164(1) +; CHECK-NEXT: fcmpu 1, 29, 1 ; CHECK-NEXT: lwz 4, 156(1) ; CHECK-NEXT: crandc 20, 6, 0 ; CHECK-NEXT: cror 20, 5, 20 @@ -120,25 +120,25 @@ ; CHECK-NEXT: bl __floatditf@PLT ; CHECK-NEXT: lis 3, 17392 ; CHECK-NEXT: stfd 1, 208(1) -; CHECK-NEXT: fmr 29, 1 +; CHECK-NEXT: stw 3, 240(1) ; CHECK-NEXT: stfd 2, 200(1) -; CHECK-NEXT: fmr 28, 2 ; CHECK-NEXT: stw 29, 244(1) -; CHECK-NEXT: stw 3, 240(1) -; CHECK-NEXT: cmpwi 2, 30, 0 ; CHECK-NEXT: stw 29, 236(1) ; CHECK-NEXT: stw 29, 232(1) +; CHECK-NEXT: fmr 29, 1 ; CHECK-NEXT: lwz 3, 212(1) +; CHECK-NEXT: fmr 28, 2 +; CHECK-NEXT: lfd 3, 240(1) +; CHECK-NEXT: lfd 4, 232(1) +; CHECK-NEXT: cmpwi 2, 30, 0 ; CHECK-NEXT: stw 3, 228(1) ; CHECK-NEXT: lwz 3, 208(1) ; CHECK-NEXT: stw 3, 224(1) ; CHECK-NEXT: lwz 3, 204(1) +; CHECK-NEXT: lfd 1, 224(1) ; CHECK-NEXT: stw 3, 220(1) ; CHECK-NEXT: lwz 3, 200(1) ; CHECK-NEXT: stw 3, 216(1) -; CHECK-NEXT: lfd 3, 240(1) -; CHECK-NEXT: lfd 4, 232(1) -; CHECK-NEXT: lfd 1, 224(1) ; CHECK-NEXT: lfd 2, 216(1) ; CHECK-NEXT: bl __gcc_qadd@PLT ; CHECK-NEXT: blt 2, .LBB0_7 @@ -150,60 +150,60 @@ ; CHECK-NEXT: fmr 1, 29 ; CHECK-NEXT: .LBB0_9: # %bb1 ; CHECK-NEXT: stfd 1, 184(1) -; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: stfd 2, 192(1) -; CHECK-NEXT: fmr 2, 30 +; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: lwz 3, 188(1) +; CHECK-NEXT: fmr 2, 30 ; CHECK-NEXT: stw 3, 260(1) ; CHECK-NEXT: lwz 3, 184(1) ; CHECK-NEXT: stw 3, 256(1) ; CHECK-NEXT: lwz 3, 196(1) +; CHECK-NEXT: lfd 3, 256(1) ; CHECK-NEXT: stw 3, 252(1) ; CHECK-NEXT: lwz 3, 192(1) ; CHECK-NEXT: stw 3, 248(1) -; CHECK-NEXT: lfd 3, 256(1) ; CHECK-NEXT: lfd 4, 248(1) ; CHECK-NEXT: bl __gcc_qsub@PLT ; CHECK-NEXT: stfd 2, 176(1) -; CHECK-NEXT: fcmpu 0, 2, 27 ; CHECK-NEXT: stfd 1, 168(1) -; CHECK-NEXT: fcmpu 1, 1, 27 +; CHECK-NEXT: fcmpu 0, 2, 27 ; CHECK-NEXT: lwz 3, 180(1) +; CHECK-NEXT: fcmpu 1, 1, 27 ; CHECK-NEXT: crandc 20, 6, 0 ; CHECK-NEXT: cror 21, 5, 7 -; CHECK-NEXT: cror 20, 21, 20 ; CHECK-NEXT: stw 3, 268(1) +; CHECK-NEXT: cror 20, 21, 20 ; CHECK-NEXT: lwz 3, 176(1) ; CHECK-NEXT: stw 3, 264(1) ; CHECK-NEXT: lwz 3, 172(1) +; CHECK-NEXT: lfd 30, 264(1) ; CHECK-NEXT: stw 3, 276(1) ; CHECK-NEXT: lwz 3, 168(1) ; CHECK-NEXT: stw 3, 272(1) -; CHECK-NEXT: lfd 30, 264(1) ; CHECK-NEXT: lfd 31, 272(1) ; CHECK-NEXT: bc 12, 20, .LBB0_13 ; CHECK-NEXT: # %bb.10: # %bb2 -; CHECK-NEXT: fneg 29, 30 ; CHECK-NEXT: fneg 28, 31 -; CHECK-NEXT: li 29, 0 -; CHECK-NEXT: lis 3, 16864 ; CHECK-NEXT: stfd 28, 48(1) +; CHECK-NEXT: lis 3, 16864 +; CHECK-NEXT: stw 3, 80(1) +; CHECK-NEXT: fneg 29, 30 +; CHECK-NEXT: lwz 3, 52(1) ; CHECK-NEXT: stfd 29, 40(1) +; CHECK-NEXT: li 29, 0 ; CHECK-NEXT: stw 29, 84(1) -; CHECK-NEXT: stw 3, 80(1) ; CHECK-NEXT: stw 29, 76(1) ; CHECK-NEXT: stw 29, 72(1) -; CHECK-NEXT: lwz 3, 52(1) ; CHECK-NEXT: stw 3, 68(1) +; CHECK-NEXT: lfd 3, 80(1) +; CHECK-NEXT: lfd 4, 72(1) ; CHECK-NEXT: lwz 3, 48(1) ; CHECK-NEXT: stw 3, 64(1) ; CHECK-NEXT: lwz 3, 44(1) +; CHECK-NEXT: lfd 1, 64(1) ; CHECK-NEXT: stw 3, 60(1) ; CHECK-NEXT: lwz 3, 40(1) ; CHECK-NEXT: stw 3, 56(1) -; CHECK-NEXT: lfd 3, 80(1) -; CHECK-NEXT: lfd 4, 72(1) -; CHECK-NEXT: lfd 1, 64(1) ; CHECK-NEXT: lfd 2, 56(1) ; CHECK-NEXT: bl __gcc_qsub@PLT ; CHECK-NEXT: mffs 0 @@ -225,8 +225,8 @@ ; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: fctiwz 2, 2 ; CHECK-NEXT: stfd 2, 24(1) -; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 3, 36(1) +; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 4, 28(1) ; CHECK-NEXT: crandc 20, 6, 1 ; CHECK-NEXT: cror 20, 4, 20 @@ -240,25 +240,25 @@ ; CHECK-NEXT: subfe 3, 29, 30 ; CHECK-NEXT: b .LBB0_16 ; CHECK-NEXT: .LBB0_13: # %bb3 -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: lis 4, 16864 ; CHECK-NEXT: stfd 31, 112(1) -; CHECK-NEXT: stfd 30, 104(1) +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: stw 3, 148(1) -; CHECK-NEXT: stw 4, 144(1) ; CHECK-NEXT: stw 3, 140(1) ; CHECK-NEXT: stw 3, 136(1) +; CHECK-NEXT: stfd 30, 104(1) +; CHECK-NEXT: lis 4, 16864 ; CHECK-NEXT: lwz 3, 116(1) +; CHECK-NEXT: stw 4, 144(1) +; CHECK-NEXT: lfd 4, 136(1) ; CHECK-NEXT: stw 3, 132(1) +; CHECK-NEXT: lfd 3, 144(1) ; CHECK-NEXT: lwz 3, 112(1) ; CHECK-NEXT: stw 3, 128(1) ; CHECK-NEXT: lwz 3, 108(1) +; CHECK-NEXT: lfd 1, 128(1) ; CHECK-NEXT: stw 3, 124(1) ; CHECK-NEXT: lwz 3, 104(1) ; CHECK-NEXT: stw 3, 120(1) -; CHECK-NEXT: lfd 3, 144(1) -; CHECK-NEXT: lfd 4, 136(1) -; CHECK-NEXT: lfd 1, 128(1) ; CHECK-NEXT: lfd 2, 120(1) ; CHECK-NEXT: bl __gcc_qsub@PLT ; CHECK-NEXT: mffs 0 @@ -280,8 +280,8 @@ ; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: fctiwz 2, 2 ; CHECK-NEXT: stfd 2, 88(1) -; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 3, 100(1) +; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 4, 92(1) ; CHECK-NEXT: crandc 20, 6, 0 ; CHECK-NEXT: cror 20, 5, 20 Index: llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll +++ llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll @@ -744,6 +744,7 @@ ; CHECK-NEXT: stb r4, 9(r5) ; CHECK-NEXT: rldicl r4, r7, 32, 56 ; CHECK-NEXT: rldicl r6, r7, 8, 56 +; CHECK-NEXT: stb r3, 12(r5) ; CHECK-NEXT: stb r4, 0(r5) ; CHECK-NEXT: rldicl r4, r7, 16, 56 ; CHECK-NEXT: stb r6, 3(r5) @@ -756,7 +757,6 @@ ; CHECK-NEXT: rldicl r4, r7, 24, 56 ; CHECK-NEXT: stb r6, 10(r5) ; CHECK-NEXT: stb r4, 11(r5) -; CHECK-NEXT: stb r3, 12(r5) ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: test_13_consecutive_stores_of_bytes: Index: llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll +++ llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll @@ -82,6 +82,7 @@ align 16 %a) { ; CHECK-LABEL: testStruct_03: ; CHECK: # %bb.0: # %entry +; CHECK: lxv v2, 128(r1) ; CHECK-DAG: std r10, 88(r1) ; CHECK-DAG: std r9, 80(r1) ; CHECK-DAG: std r8, 72(r1) @@ -90,11 +91,11 @@ ; CHECK-DAG: std r5, 48(r1) ; CHECK-DAG: std r4, 40(r1) ; CHECK-DAG: std r3, 32(r1) -; CHECK-NEXT: lxv v2, 128(r1) ; CHECK-NEXT: blr ; CHECK-BE-LABEL: testStruct_03: ; CHECK-BE: # %bb.0: # %entry +; CHECK-BE: lxv v2, 144(r1) ; CHECK-BE-DAG: std r10, 104(r1) ; CHECK-BE-DAG: std r9, 96(r1) ; CHECK-BE-DAG: std r8, 88(r1) @@ -103,7 +104,6 @@ ; CHECK-BE-DAG: std r5, 64(r1) ; CHECK-BE-DAG: std r4, 56(r1) ; CHECK-BE-DAG: std r3, 48(r1) -; CHECK-BE-NEXT: lxv v2, 144(r1) ; CHECK-BE-NEXT: blr entry: %a7 = getelementptr inbounds %struct.With9fp128params, @@ -256,28 +256,28 @@ define fp128 @testNestedAggregate(%struct.MixedC* byval nocapture readonly align 16 %a) { ; CHECK-LABEL: testNestedAggregate: ; CHECK: # %bb.0: # %entry -; CHECK-DAG: std r10, 88(r1) -; CHECK-DAG: std r9, 80(r1) ; CHECK-DAG: std r8, 72(r1) ; CHECK-DAG: std r7, 64(r1) +; CHECK: lxv v2, 64(r1) +; CHECK-DAG: std r10, 88(r1) +; CHECK-DAG: std r9, 80(r1) ; CHECK-DAG: std r6, 56(r1) ; CHECK-DAG: std r5, 48(r1) ; CHECK-DAG: std r4, 40(r1) ; CHECK-DAG: std r3, 32(r1) -; CHECK-NEXT: lxv v2, 64(r1) ; CHECK-NEXT: blr ; CHECK-BE-LABEL: testNestedAggregate: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-DAG: std r8, 88(r1) ; CHECK-BE-DAG: std r7, 80(r1) +; CHECK-BE-NEXT: lxv v2, 80(r1) ; CHECK-BE-DAG: std r10, 104(r1) ; CHECK-BE-DAG: std r9, 96(r1) ; CHECK-BE-DAG: std r6, 72(r1) ; CHECK-BE-DAG: std r5, 64(r1) ; CHECK-BE-DAG: std r4, 56(r1) ; CHECK-BE-DAG: std r3, 48(r1) -; CHECK-BE-NEXT: lxv v2, 80(r1) ; CHECK-BE-NEXT: blr entry: %c = getelementptr inbounds %struct.MixedC, %struct.MixedC* %a, i64 0, i32 1, i32 1 Index: llvm/trunk/test/CodeGen/PowerPC/legalize-vaarg.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/legalize-vaarg.ll +++ llvm/trunk/test/CodeGen/PowerPC/legalize-vaarg.ll @@ -5,6 +5,7 @@ define <8 x i32> @test_large_vec_vaarg(i32 %n, ...) { ; BE-LABEL: test_large_vec_vaarg: ; BE: # %bb.0: +; BE-NEXT: ld 3, -8(1) ; BE-NEXT: std 4, 56(1) ; BE-NEXT: std 5, 64(1) ; BE-NEXT: std 6, 72(1) @@ -12,7 +13,6 @@ ; BE-NEXT: std 8, 88(1) ; BE-NEXT: std 9, 96(1) ; BE-NEXT: std 10, 104(1) -; BE-NEXT: ld 3, -8(1) ; BE-NEXT: addi 3, 3, 15 ; BE-NEXT: rldicr 3, 3, 0, 59 ; BE-NEXT: addi 4, 3, 16 @@ -27,16 +27,16 @@ ; ; LE-LABEL: test_large_vec_vaarg: ; LE: # %bb.0: +; LE-NEXT: ld 3, -8(1) ; LE-NEXT: std 4, 40(1) ; LE-NEXT: std 5, 48(1) ; LE-NEXT: std 6, 56(1) ; LE-NEXT: std 7, 64(1) +; LE-NEXT: addi 3, 3, 15 +; LE-NEXT: rldicr 3, 3, 0, 59 ; LE-NEXT: std 8, 72(1) ; LE-NEXT: std 9, 80(1) ; LE-NEXT: std 10, 88(1) -; LE-NEXT: ld 3, -8(1) -; LE-NEXT: addi 3, 3, 15 -; LE-NEXT: rldicr 3, 3, 0, 59 ; LE-NEXT: addi 4, 3, 31 ; LE-NEXT: addi 5, 3, 16 ; LE-NEXT: rldicr 4, 4, 0, 59 Index: llvm/trunk/test/CodeGen/PowerPC/ppc32-skip-regs.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc32-skip-regs.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc32-skip-regs.ll @@ -16,10 +16,10 @@ ; left for long double type (4 registers in soft float mode). Instead in r8 register this ; argument put on stack. ; CHECK-NOT: mr 8, 4 -; CHECK: stw 7, 20(1) ; CHECK: stw 6, 16(1) ; CHECK: stw 5, 12(1) ; CHECK: stw 4, 8(1) +; CHECK: stw 7, 20(1) declare i32 @printf(i8* nocapture readonly, ...) Index: llvm/trunk/test/CodeGen/PowerPC/scheduling-mem-dependency.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/scheduling-mem-dependency.ll +++ llvm/trunk/test/CodeGen/PowerPC/scheduling-mem-dependency.ll @@ -0,0 +1,19 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +define i64 @store_disjoint_memory(i64* nocapture %P, i64 %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_disjoint_memory:%bb.0 +; CHECK:SU(2): STD renamable $x4, 24, renamable $x5 :: (store 8 into %ir.arrayidx) +; CHECK-NOT: Successors: +; CHECK-NOT: SU(3): Ord Latency=0 Memory +; CHECK:SU(3): STD renamable $x4, 16, renamable $x5 :: (store 8 into %ir.arrayidx1) +; CHECK: Predecessors: +; CHECK-NOT: SU(2): Ord Latency=0 Memory + %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 + store i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 + store i64 %v, i64* %arrayidx1 + ret i64 %v +} Index: llvm/trunk/test/CodeGen/PowerPC/varargs.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/varargs.ll +++ llvm/trunk/test/CodeGen/PowerPC/varargs.ll @@ -8,25 +8,29 @@ ; P32-LABEL: test1: ; P32: # %bb.0: ; P32-NEXT: lbz r4, 0(r3) -; P32-NEXT: addi r5, r4, 1 -; P32-NEXT: stb r5, 0(r3) -; P32-NEXT: cmpwi r4, 8 ; P32-NEXT: lwz r5, 4(r3) +; P32-NEXT: lwz r6, 8(r3) +; P32-NEXT: addi r7, r4, 1 +; P32-NEXT: stb r7, 0(r3) +; P32-NEXT: addi r7, r5, 4 +; P32-NEXT: cmpwi r4, 8 ; P32-NEXT: slwi r4, r4, 2 -; P32-NEXT: addi r6, r5, 4 -; P32-NEXT: bc 12, lt, .LBB0_1 -; P32-NEXT: b .LBB0_2 -; P32-NEXT: .LBB0_1: -; P32-NEXT: addi r6, r5, 0 +; P32-NEXT: add r4, r6, r4 +; P32-NEXT: bc 12, lt, .LBB0_2 +; P32-NEXT: # %bb.1: +; P32-NEXT: ori r6, r7, 0 +; P32-NEXT: b .LBB0_3 ; P32-NEXT: .LBB0_2: +; P32-NEXT: addi r6, r5, 0 +; P32-NEXT: .LBB0_3: ; P32-NEXT: stw r6, 4(r3) -; P32-NEXT: lwz r3, 8(r3) -; P32-NEXT: add r3, r3, r4 -; P32-NEXT: bc 12, lt, .LBB0_4 -; P32-NEXT: # %bb.3: +; P32-NEXT: bc 12, lt, .LBB0_5 +; P32-NEXT: # %bb.4: ; P32-NEXT: ori r3, r5, 0 -; P32-NEXT: b .LBB0_4 -; P32-NEXT: .LBB0_4: +; P32-NEXT: b .LBB0_6 +; P32-NEXT: .LBB0_5: +; P32-NEXT: addi r3, r4, 0 +; P32-NEXT: .LBB0_6: ; P32-NEXT: lwz r3, 0(r3) ; P32-NEXT: blr ; Index: llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll +++ llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll @@ -67,13 +67,13 @@ ; NOP8VEC-NEXT: stxvd2x 1, 0, 4 ; NOP8VEC-NEXT: ld 3, -24(1) ; NOP8VEC-NEXT: ld 4, -40(1) +; NOP8VEC-NEXT: ld 6, -48(1) ; NOP8VEC-NEXT: cmpd 4, 3 ; NOP8VEC-NEXT: li 3, 0 ; NOP8VEC-NEXT: li 4, -1 ; NOP8VEC-NEXT: isel 5, 4, 3, 1 ; NOP8VEC-NEXT: std 5, -8(1) ; NOP8VEC-NEXT: ld 5, -32(1) -; NOP8VEC-NEXT: ld 6, -48(1) ; NOP8VEC-NEXT: cmpd 6, 5 ; NOP8VEC-NEXT: isel 3, 4, 3, 1 ; NOP8VEC-NEXT: std 3, -16(1) @@ -184,13 +184,13 @@ ; NOP8VEC-NEXT: stxvd2x 1, 0, 4 ; NOP8VEC-NEXT: ld 3, -24(1) ; NOP8VEC-NEXT: ld 4, -40(1) +; NOP8VEC-NEXT: ld 6, -48(1) ; NOP8VEC-NEXT: cmpd 4, 3 ; NOP8VEC-NEXT: li 3, 0 ; NOP8VEC-NEXT: li 4, -1 ; NOP8VEC-NEXT: isel 5, 4, 3, 0 ; NOP8VEC-NEXT: std 5, -8(1) ; NOP8VEC-NEXT: ld 5, -32(1) -; NOP8VEC-NEXT: ld 6, -48(1) ; NOP8VEC-NEXT: cmpd 6, 5 ; NOP8VEC-NEXT: isel 3, 4, 3, 0 ; NOP8VEC-NEXT: std 3, -16(1) Index: llvm/trunk/test/CodeGen/PowerPC/vsx.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/vsx.ll +++ llvm/trunk/test/CodeGen/PowerPC/vsx.ll @@ -802,9 +802,9 @@ ; CHECK-NEXT: ld r3, -24(r1) ; CHECK-NEXT: ld r4, -40(r1) ; CHECK-NEXT: add r3, r4, r3 +; CHECK-NEXT: ld r4, -48(r1) ; CHECK-NEXT: std r3, -8(r1) ; CHECK-NEXT: ld r3, -32(r1) -; CHECK-NEXT: ld r4, -48(r1) ; CHECK-NEXT: add r3, r4, r3 ; CHECK-NEXT: std r3, -16(r1) ; CHECK-NEXT: addi r3, r1, -16 @@ -820,9 +820,9 @@ ; CHECK-REG-NEXT: ld r3, -24(r1) ; CHECK-REG-NEXT: ld r4, -40(r1) ; CHECK-REG-NEXT: add r3, r4, r3 +; CHECK-REG-NEXT: ld r4, -48(r1) ; CHECK-REG-NEXT: std r3, -8(r1) ; CHECK-REG-NEXT: ld r3, -32(r1) -; CHECK-REG-NEXT: ld r4, -48(r1) ; CHECK-REG-NEXT: add r3, r4, r3 ; CHECK-REG-NEXT: std r3, -16(r1) ; CHECK-REG-NEXT: addi r3, r1, -16 @@ -1832,9 +1832,9 @@ ; CHECK-NEXT: lwz r3, -20(r1) ; CHECK-NEXT: ld r4, -40(r1) ; CHECK-NEXT: sld r3, r4, r3 +; CHECK-NEXT: ld r4, -48(r1) ; CHECK-NEXT: std r3, -8(r1) ; CHECK-NEXT: lwz r3, -28(r1) -; CHECK-NEXT: ld r4, -48(r1) ; CHECK-NEXT: sld r3, r4, r3 ; CHECK-NEXT: std r3, -16(r1) ; CHECK-NEXT: addi r3, r1, -16 @@ -1850,9 +1850,9 @@ ; CHECK-REG-NEXT: lwz r3, -20(r1) ; CHECK-REG-NEXT: ld r4, -40(r1) ; CHECK-REG-NEXT: sld r3, r4, r3 +; CHECK-REG-NEXT: ld r4, -48(r1) ; CHECK-REG-NEXT: std r3, -8(r1) ; CHECK-REG-NEXT: lwz r3, -28(r1) -; CHECK-REG-NEXT: ld r4, -48(r1) ; CHECK-REG-NEXT: sld r3, r4, r3 ; CHECK-REG-NEXT: std r3, -16(r1) ; CHECK-REG-NEXT: addi r3, r1, -16 @@ -1898,9 +1898,9 @@ ; CHECK-NEXT: lwz r3, -20(r1) ; CHECK-NEXT: ld r4, -40(r1) ; CHECK-NEXT: srd r3, r4, r3 +; CHECK-NEXT: ld r4, -48(r1) ; CHECK-NEXT: std r3, -8(r1) ; CHECK-NEXT: lwz r3, -28(r1) -; CHECK-NEXT: ld r4, -48(r1) ; CHECK-NEXT: srd r3, r4, r3 ; CHECK-NEXT: std r3, -16(r1) ; CHECK-NEXT: addi r3, r1, -16 @@ -1916,9 +1916,9 @@ ; CHECK-REG-NEXT: lwz r3, -20(r1) ; CHECK-REG-NEXT: ld r4, -40(r1) ; CHECK-REG-NEXT: srd r3, r4, r3 +; CHECK-REG-NEXT: ld r4, -48(r1) ; CHECK-REG-NEXT: std r3, -8(r1) ; CHECK-REG-NEXT: lwz r3, -28(r1) -; CHECK-REG-NEXT: ld r4, -48(r1) ; CHECK-REG-NEXT: srd r3, r4, r3 ; CHECK-REG-NEXT: std r3, -16(r1) ; CHECK-REG-NEXT: addi r3, r1, -16 @@ -1964,9 +1964,9 @@ ; CHECK-NEXT: lwz r3, -20(r1) ; CHECK-NEXT: ld r4, -40(r1) ; CHECK-NEXT: srad r3, r4, r3 +; CHECK-NEXT: ld r4, -48(r1) ; CHECK-NEXT: std r3, -8(r1) ; CHECK-NEXT: lwz r3, -28(r1) -; CHECK-NEXT: ld r4, -48(r1) ; CHECK-NEXT: srad r3, r4, r3 ; CHECK-NEXT: std r3, -16(r1) ; CHECK-NEXT: addi r3, r1, -16 @@ -1982,9 +1982,9 @@ ; CHECK-REG-NEXT: lwz r3, -20(r1) ; CHECK-REG-NEXT: ld r4, -40(r1) ; CHECK-REG-NEXT: srad r3, r4, r3 +; CHECK-REG-NEXT: ld r4, -48(r1) ; CHECK-REG-NEXT: std r3, -8(r1) ; CHECK-REG-NEXT: lwz r3, -28(r1) -; CHECK-REG-NEXT: ld r4, -48(r1) ; CHECK-REG-NEXT: srad r3, r4, r3 ; CHECK-REG-NEXT: std r3, -16(r1) ; CHECK-REG-NEXT: addi r3, r1, -16 @@ -2148,13 +2148,13 @@ ; CHECK-NEXT: stxvd2x v2, 0, r4 ; CHECK-NEXT: ld r3, -24(r1) ; CHECK-NEXT: ld r4, -40(r1) +; CHECK-NEXT: ld r6, -48(r1) ; CHECK-NEXT: cmpld r4, r3 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: li r4, -1 ; CHECK-NEXT: isel r5, r4, r3, lt ; CHECK-NEXT: std r5, -8(r1) ; CHECK-NEXT: ld r5, -32(r1) -; CHECK-NEXT: ld r6, -48(r1) ; CHECK-NEXT: cmpld r6, r5 ; CHECK-NEXT: isel r3, r4, r3, lt ; CHECK-NEXT: std r3, -16(r1) @@ -2170,13 +2170,13 @@ ; CHECK-REG-NEXT: stxvd2x v2, 0, r4 ; CHECK-REG-NEXT: ld r3, -24(r1) ; CHECK-REG-NEXT: ld r4, -40(r1) +; CHECK-REG-NEXT: ld r6, -48(r1) ; CHECK-REG-NEXT: cmpld r4, r3 ; CHECK-REG-NEXT: li r3, 0 ; CHECK-REG-NEXT: li r4, -1 ; CHECK-REG-NEXT: isel r5, r4, r3, lt ; CHECK-REG-NEXT: std r5, -8(r1) ; CHECK-REG-NEXT: ld r5, -32(r1) -; CHECK-REG-NEXT: ld r6, -48(r1) ; CHECK-REG-NEXT: cmpld r6, r5 ; CHECK-REG-NEXT: isel r3, r4, r3, lt ; CHECK-REG-NEXT: std r3, -16(r1)