diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -187,6 +187,26 @@ def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true", "Target supports store clustering", [FeatureFusion]>; +def FeatureArithAddFusion : + SubtargetFeature<"fuse-arith-add", "HasArithAddFusion", "true", + "Target supports Arithmetic Operations with Add fusion", + [FeatureFusion]>; +def FeatureAddLogicalFusion : + SubtargetFeature<"fuse-add-logical", "HasAddLogicalFusion", "true", + "Target supports Add with Logical Operations fusion", + [FeatureFusion]>; +def FeatureLogicalAddFusion : + SubtargetFeature<"fuse-logical-add", "HasLogicalAddFusion", "true", + "Target supports Logical Operations with Add fusion", + [FeatureFusion]>; +def FeatureLogicalFusion : + SubtargetFeature<"fuse-logical", "HasLogicalFusion", "true", + "Target supports Logical Operations fusion", + [FeatureFusion]>; +def FeatureSha3Fusion : + SubtargetFeature<"fuse-sha3", "HasSha3Fusion", "true", + "Target supports SHA3 assist fusion", + [FeatureFusion]>; def FeatureUnalignedFloats : SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", "true", "CPU does not trap on unaligned FP access">; @@ -375,7 +395,10 @@ // Power10 // For P10 CPU we assume that all of the existing features from Power9 // still exist with the exception of those we know are Power9 specific. - list FusionFeatures = [FeatureStoreFusion]; + list FusionFeatures = [ + FeatureStoreFusion, FeatureAddLogicalFusion, FeatureLogicalAddFusion, + FeatureLogicalFusion, FeatureSha3Fusion, FeatureArithAddFusion + ]; list P10AdditionalFeatures = !listconcat(FusionFeatures, [ DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp @@ -75,6 +75,19 @@ return Op1.getReg() == Op2.getReg(); } +static bool matchingImmOps(const MachineInstr &MI, + int MIOpIndex, + int64_t Expect, + unsigned ExtendFrom = 64) { + const MachineOperand &Op = MI.getOperand(MIOpIndex); + if (!Op.isImm()) + return false; + int64_t Imm = Op.getImm(); + if (ExtendFrom < 64) + Imm = SignExtend64(Imm, ExtendFrom); + return Imm == Expect; +} + // Return true if the FirstMI meets the constraints of SecondMI according to // fusion specification. static bool checkOpConstraints(FusionFeature::FusionKind Kd, @@ -132,6 +145,18 @@ } return true; } + + // rldicl rx, ra, 1, 0 - xor + case FusionFeature::FK_RotateLeftXor: + return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 0); + + // rldicr rx, ra, 1, 63 - xor + case FusionFeature::FK_RotateRightXor: + return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 63); + + case FusionFeature::FK_SldiAdd: + return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) || + (matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57)); } llvm_unreachable("All the cases should have been handled"); diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def @@ -41,5 +41,43 @@ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), \ FUSION_OP_SET(LD, LBZ, LBZ8, LHZ, LHZ8, LWZ, LWZ8)) +// Power10 Instruction Fusion + +// {add, mulld} - add +FUSION_FEATURE(ArithAdd, hasArithAddFusion, -1, + FUSION_OP_SET(ADD4, ADD8, MULLD), FUSION_OP_SET(ADD4, ADD8)) + +// {add, subf} - {and, nand, nor, or} +FUSION_FEATURE(ArithLogical, hasAddLogicalFusion, -1, + FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8), + FUSION_OP_SET(AND, AND8, OR, OR8, NAND, NAND8, NOR, NOR8)) + +// {and, andc, eqv, nand, nor, or, orc, xor} - {add, subf} +FUSION_FEATURE(LogicalArith, hasLogicalAddFusion, -1, + FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8, + ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8), + FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8)) + +// Either of {and, andc, eqv, nand, nor, or, orc, xor} +FUSION_FEATURE(Logical, hasLogicalFusion, -1, + FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8, + ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8), + FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8, + ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8)) + +// sldi rx, ra, {3, 6} - {add, subf} +// sldi rx, ra n is alias of rldicr rx, ra, n, 63-n +FUSION_FEATURE(SldiAdd, hasArithAddFusion, 1, FUSION_OP_SET(RLDICR, RLDICR_32), + FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8)) + +// rldicl rx, ra, 1, 0 - xor +FUSION_FEATURE(RotateLeftXor, hasSha3Fusion, 1, + FUSION_OP_SET(RLDICL, RLDICL_32, RLDICL_32_64), + FUSION_OP_SET(XOR, XOR8)) + +// rldicr rx, ra, 1, 63 - xor +FUSION_FEATURE(RotateRightXor, hasSha3Fusion, 1, + FUSION_OP_SET(RLDICR, RLDICR_32), FUSION_OP_SET(XOR, XOR8)) + #undef FUSION_FEATURE #undef FUSION_OP_SET diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -147,6 +147,11 @@ bool HasStoreFusion; bool HasAddiLoadFusion; bool HasAddisLoadFusion; + bool HasArithAddFusion; + bool HasAddLogicalFusion; + bool HasLogicalAddFusion; + bool HasLogicalFusion; + bool HasSha3Fusion; bool IsISA2_06; bool IsISA2_07; bool IsISA3_0; @@ -332,6 +337,11 @@ bool hasStoreFusion() const { return HasStoreFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } bool hasAddisLoadFusion() const { return HasAddisLoadFusion; } + bool hasArithAddFusion() const { return HasArithAddFusion; } + bool hasAddLogicalFusion() const { return HasAddLogicalFusion; } + bool hasLogicalAddFusion() const { return HasLogicalAddFusion; } + bool hasLogicalFusion() const { return HasLogicalFusion; } + bool hasSha3Fusion() const { return HasSha3Fusion; } bool needsSwapsForVSXMemOps() const { return hasVSX() && isLittleEndian() && !hasP9Vector(); } diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -127,6 +127,11 @@ HasStoreFusion = false; HasAddiLoadFusion = false; HasAddisLoadFusion = false; + HasArithAddFusion = false; + HasAddLogicalFusion = false; + HasLogicalAddFusion = false; + HasLogicalFusion = false; + HasSha3Fusion = false; IsISA2_06 = false; IsISA2_07 = false; IsISA3_0 = false; diff --git a/llvm/test/CodeGen/PowerPC/macro-fusion.mir b/llvm/test/CodeGen/PowerPC/macro-fusion.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/macro-fusion.mir @@ -0,0 +1,94 @@ +# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 -x=mir < %s \ +# RUN: -debug-only=machine-scheduler -start-before=postmisched 2>&1 \ +# RUN: | FileCheck %s + +# CHECK: add_mulld:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / MULLD - ADD8 +--- +name: add_mulld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = MULLD $x3, $x4 + renamable $x3 = ADD8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +# CHECK: add_and:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / ADD8 - AND8 +--- +name: add_and +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = ADD8 $x3, $x4 + renamable $x3 = AND8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +# CHECK: xor_subf:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / XOR8 - SUBF8 +--- +name: xor_subf +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = XOR8 $x3, $x4 + renamable $x3 = SUBF8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +# CHECK: or_nand:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / OR8 - NAND8 +--- +name: or_nand +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = OR8 $x3, $x4 + renamable $x3 = NAND8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +# CHECK: sldi_add:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / RLDICR - ADD8 +--- +name: sldi_add +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = RLDICR $x3, 3, 60 + renamable $x3 = ADD8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +# CHECK: rldicl_xor:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / RLDICL - XOR8 +--- +name: rldicl_xor +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = RLDICL $x3, 1, 0 + renamable $x3 = XOR8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +# CHECK: rldicr_xor:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / RLDICR - XOR8 +--- +name: rldicr_xor +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = RLDICR $x3, 1, 63 + renamable $x3 = XOR8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -88,23 +88,23 @@ ; CHECK-NEXT: addi 5, 5, 1 ; CHECK-NEXT: li 20, 9 ; CHECK-NEXT: ld 28, 824(1) -; CHECK-NEXT: ld 19, 712(1) ; CHECK-NEXT: lwa 3, 0(7) -; CHECK-NEXT: ld 7, 784(1) -; CHECK-NEXT: ld 12, 776(1) -; CHECK-NEXT: ld 11, 768(1) +; CHECK-NEXT: ld 21, 736(1) +; CHECK-NEXT: ld 11, 776(1) +; CHECK-NEXT: ld 12, 768(1) ; CHECK-NEXT: ld 2, 760(1) ; CHECK-NEXT: ld 29, 832(1) -; CHECK-NEXT: cmpldi 5, 9 ; CHECK-NEXT: ld 27, 816(1) +; CHECK-NEXT: cmpldi 5, 9 ; CHECK-NEXT: ld 26, 808(1) ; CHECK-NEXT: ld 25, 800(1) ; CHECK-NEXT: ld 24, 792(1) +; CHECK-NEXT: ld 7, 784(1) ; CHECK-NEXT: iselgt 5, 5, 20 ; CHECK-NEXT: ld 30, 752(1) ; CHECK-NEXT: ld 22, 744(1) -; CHECK-NEXT: ld 21, 736(1) -; CHECK-NEXT: ld 20, 728(1) +; CHECK-NEXT: ld 20, 720(1) +; CHECK-NEXT: ld 19, 712(1) ; CHECK-NEXT: ld 18, 704(1) ; CHECK-NEXT: ld 17, 696(1) ; CHECK-NEXT: ld 16, 688(1) @@ -113,27 +113,14 @@ ; CHECK-NEXT: std 5, 216(1) # 8-byte Folded Spill ; CHECK-NEXT: std 28, 208(1) # 8-byte Folded Spill ; CHECK-NEXT: mr 5, 4 -; CHECK-NEXT: ld 4, 720(1) -; CHECK-NEXT: std 19, 96(1) # 8-byte Folded Spill -; CHECK-NEXT: std 4, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 11, 0(4) -; CHECK-NEXT: mr 4, 5 -; CHECK-NEXT: ld 5, 216(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 4, 728(1) ; CHECK-NEXT: ld 15, 672(1) ; CHECK-NEXT: sldi 31, 3, 1 -; CHECK-NEXT: std 8, 32(1) # 8-byte Folded Spill -; CHECK-NEXT: std 9, 40(1) # 8-byte Folded Spill ; CHECK-NEXT: lxv 43, 0(8) -; CHECK-NEXT: mr 8, 6 -; CHECK-NEXT: sldi 6, 3, 3 -; CHECK-NEXT: std 2, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: std 11, 152(1) # 8-byte Folded Spill ; CHECK-NEXT: lxv 3, 0(2) -; CHECK-NEXT: lxv 2, 0(11) -; CHECK-NEXT: lxv 0, 0(7) -; CHECK-NEXT: add 6, 6, 23 -; CHECK-NEXT: lxv 7, 0(28) -; CHECK-NEXT: add 28, 3, 31 +; CHECK-NEXT: lxv 2, 0(12) +; CHECK-NEXT: lxv 1, 0(11) +; CHECK-NEXT: lxv 8, 0(28) ; CHECK-NEXT: lxv 42, 0(9) ; CHECK-NEXT: lxv 41, 0(10) ; CHECK-NEXT: lxv 40, 0(15) @@ -142,32 +129,45 @@ ; CHECK-NEXT: lxv 33, 0(17) ; CHECK-NEXT: lxv 37, 0(18) ; CHECK-NEXT: lxv 13, 0(19) -; CHECK-NEXT: lxv 10, 0(20) -; CHECK-NEXT: lxv 8, 0(21) -; CHECK-NEXT: lxv 6, 0(22) +; CHECK-NEXT: add 28, 3, 31 +; CHECK-NEXT: lxv 11, 0(20) +; CHECK-NEXT: lxv 7, 0(21) +; CHECK-NEXT: std 8, 32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 9, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 8, 6 +; CHECK-NEXT: lxv 5, 0(22) ; CHECK-NEXT: lxv 4, 0(30) -; CHECK-NEXT: lxv 1, 0(12) +; CHECK-NEXT: lxv 0, 0(7) ; CHECK-NEXT: lxv 32, 0(24) ; CHECK-NEXT: lxv 36, 0(25) ; CHECK-NEXT: lxv 12, 0(26) -; CHECK-NEXT: lxv 9, 0(27) -; CHECK-NEXT: lxv 5, 0(29) +; CHECK-NEXT: lxv 10, 0(27) +; CHECK-NEXT: lxv 6, 0(29) +; CHECK-NEXT: sldi 6, 3, 3 +; CHECK-NEXT: add 6, 6, 23 +; CHECK-NEXT: lxv 9, 0(4) +; CHECK-NEXT: std 4, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 4, 5 +; CHECK-NEXT: ld 5, 216(1) # 8-byte Folded Reload ; CHECK-NEXT: addi 5, 5, -2 -; CHECK-NEXT: sldi 11, 3, 4 -; CHECK-NEXT: std 12, 160(1) # 8-byte Folded Spill +; CHECK-NEXT: std 2, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: std 12, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: std 11, 160(1) # 8-byte Folded Spill ; CHECK-NEXT: std 7, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: add 7, 3, 0 -; CHECK-NEXT: add 12, 11, 23 -; CHECK-NEXT: addi 11, 6, 32 -; CHECK-NEXT: addi 12, 12, 32 +; CHECK-NEXT: add 11, 3, 0 +; CHECK-NEXT: addi 7, 6, 32 +; CHECK-NEXT: sldi 12, 3, 4 +; CHECK-NEXT: add 6, 12, 23 +; CHECK-NEXT: addi 12, 6, 32 ; CHECK-NEXT: std 22, 128(1) # 8-byte Folded Spill ; CHECK-NEXT: std 30, 136(1) # 8-byte Folded Spill ; CHECK-NEXT: std 26, 192(1) # 8-byte Folded Spill ; CHECK-NEXT: std 27, 200(1) # 8-byte Folded Spill ; CHECK-NEXT: mulli 26, 3, 48 ; CHECK-NEXT: mulli 22, 3, 6 -; CHECK-NEXT: sldi 6, 7, 3 -; CHECK-NEXT: add 30, 23, 6 +; CHECK-NEXT: sldi 30, 11, 3 +; CHECK-NEXT: add 30, 23, 30 ; CHECK-NEXT: std 29, 216(1) # 8-byte Folded Spill ; CHECK-NEXT: std 24, 176(1) # 8-byte Folded Spill ; CHECK-NEXT: std 25, 184(1) # 8-byte Folded Spill @@ -179,8 +179,8 @@ ; CHECK-NEXT: std 16, 72(1) # 8-byte Folded Spill ; CHECK-NEXT: std 17, 80(1) # 8-byte Folded Spill ; CHECK-NEXT: std 18, 88(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 96(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 104(1) # 8-byte Folded Spill ; CHECK-NEXT: rldicl 5, 5, 61, 3 ; CHECK-NEXT: addi 2, 5, 1 ; CHECK-NEXT: sldi 5, 3, 5 @@ -192,7 +192,7 @@ ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2 -; CHECK-NEXT: maddld 6, 22, 24, 7 +; CHECK-NEXT: maddld 6, 22, 24, 11 ; CHECK-NEXT: maddld 20, 22, 24, 0 ; CHECK-NEXT: mtctr 2 ; CHECK-NEXT: sldi 6, 6, 3 @@ -243,30 +243,30 @@ ; CHECK-NEXT: xvmaddadp 37, 46, 34 ; CHECK-NEXT: xvmaddadp 13, 48, 34 ; CHECK-NEXT: xvmaddadp 11, 50, 34 -; CHECK-NEXT: xvmaddadp 10, 62, 34 -; CHECK-NEXT: xvmaddadp 8, 60, 34 +; CHECK-NEXT: xvmaddadp 9, 62, 34 +; CHECK-NEXT: xvmaddadp 7, 60, 34 ; CHECK-NEXT: lxvp 34, 32(20) ; CHECK-NEXT: lxvp 44, 32(21) ; CHECK-NEXT: addi 20, 20, 64 ; CHECK-NEXT: addi 21, 21, 64 -; CHECK-NEXT: xvmaddadp 6, 57, 59 +; CHECK-NEXT: xvmaddadp 5, 57, 59 ; CHECK-NEXT: xvmaddadp 4, 55, 59 ; CHECK-NEXT: xvmaddadp 3, 53, 59 ; CHECK-NEXT: xvmaddadp 2, 31, 59 ; CHECK-NEXT: xvmaddadp 32, 56, 58 ; CHECK-NEXT: xvmaddadp 36, 54, 58 ; CHECK-NEXT: xvmaddadp 12, 52, 58 -; CHECK-NEXT: xvmaddadp 9, 30, 58 +; CHECK-NEXT: xvmaddadp 10, 30, 58 ; CHECK-NEXT: xvmaddadp 1, 35, 59 ; CHECK-NEXT: xvmaddadp 0, 45, 59 -; CHECK-NEXT: xvmaddadp 7, 34, 58 -; CHECK-NEXT: xvmaddadp 5, 44, 58 +; CHECK-NEXT: xvmaddadp 8, 34, 58 +; CHECK-NEXT: xvmaddadp 6, 44, 58 ; CHECK-NEXT: bdnz .LBB0_4 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_ ; CHECK-NEXT: # ; CHECK-NEXT: addi 25, 25, 6 ; CHECK-NEXT: add 5, 5, 26 -; CHECK-NEXT: add 11, 11, 26 +; CHECK-NEXT: add 7, 7, 26 ; CHECK-NEXT: add 30, 30, 26 ; CHECK-NEXT: add 12, 12, 26 ; CHECK-NEXT: add 29, 29, 26 @@ -296,11 +296,11 @@ ; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 11, 0(3) ; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 10, 0(3) +; CHECK-NEXT: stxv 9, 0(3) ; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 8, 0(3) +; CHECK-NEXT: stxv 7, 0(3) ; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 6, 0(3) +; CHECK-NEXT: stxv 5, 0(3) ; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 4, 0(3) ; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload @@ -318,11 +318,11 @@ ; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 12, 0(3) ; CHECK-NEXT: ld 3, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 9, 0(3) +; CHECK-NEXT: stxv 10, 0(3) ; CHECK-NEXT: ld 3, 208(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 7, 0(3) +; CHECK-NEXT: stxv 8, 0(3) ; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 5, 0(3) +; CHECK-NEXT: stxv 6, 0(3) ; CHECK-NEXT: .LBB0_7: # %_return_bb ; CHECK-NEXT: lxv 63, 400(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 62, 384(1) # 16-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll b/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll --- a/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll +++ b/llvm/test/CodeGen/PowerPC/p10-fi-elim.ll @@ -35,6 +35,7 @@ ; CHECK-NEXT: stb r10, 0(r3) ; CHECK-NEXT: stb r5, 0(r3) ; CHECK-NEXT: lbz r5, 2(r7) +; CHECK-NEXT: mr r7, r9 ; CHECK-NEXT: li r2, 1 ; CHECK-NEXT: stb r10, 0(r3) ; CHECK-NEXT: pstxv v2, 64(r1), 0 @@ -42,7 +43,6 @@ ; CHECK-NEXT: mfvsrd r11, v2 ; CHECK-NEXT: li r0, 4 ; CHECK-NEXT: stw r2, 0(r3) -; CHECK-NEXT: mr r7, r9 ; CHECK-NEXT: std r0, 0(r3) ; CHECK-NEXT: rlwinm r5, r5, 0, 27, 27 ; CHECK-NEXT: mfvsrd r12, v3 @@ -85,13 +85,13 @@ ; CHECK-BE-NEXT: stb r11, 0(r3) ; CHECK-BE-NEXT: stb r5, 0(r3) ; CHECK-BE-NEXT: lbz r5, 2(r7) +; CHECK-BE-NEXT: mr r7, r9 ; CHECK-BE-NEXT: vaddudm v3, v2, v2 ; CHECK-BE-NEXT: mfvsrld r10, v2 ; CHECK-BE-NEXT: li r30, 1 ; CHECK-BE-NEXT: stb r11, 0(r3) ; CHECK-BE-NEXT: li r0, 4 ; CHECK-BE-NEXT: stw r30, 0(r3) -; CHECK-BE-NEXT: mr r7, r9 ; CHECK-BE-NEXT: std r0, 0(r3) ; CHECK-BE-NEXT: rlwinm r5, r5, 0, 27, 27 ; CHECK-BE-NEXT: mfvsrld r12, v3 diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll --- a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll @@ -156,7 +156,6 @@ ; CHECK-NEXT: setbc r5, 4*cr5+un ; CHECK-NEXT: # implicit-def: $cr5un ; CHECK-NEXT: mfocrf r8, 4 -; CHECK-NEXT: add r5, r7, r5 ; CHECK-NEXT: rlwimi r8, r9, 9, 23, 23 ; CHECK-NEXT: lwz r9, -4(r1) ; CHECK-NEXT: mtocrf 4, r8 @@ -164,9 +163,10 @@ ; CHECK-NEXT: lwz r9, -8(r1) ; CHECK-NEXT: isel r3, 0, r3, 4*cr5+lt ; CHECK-NEXT: setbc r8, 4*cr5+un +; CHECK-NEXT: add r5, r7, r5 +; CHECK-NEXT: add r5, r8, r5 ; CHECK-NEXT: isel r6, 0, r6, 4*cr5+gt ; CHECK-NEXT: isel r4, 0, r4, 4*cr5+eq -; CHECK-NEXT: add r5, r8, r5 ; CHECK-NEXT: iseleq r3, 0, r3 ; CHECK-NEXT: mtfprd f0, r5 ; CHECK-NEXT: mtocrf 128, r9 @@ -175,11 +175,11 @@ ; CHECK-NEXT: xscvsxddp f0, f0 ; CHECK-NEXT: iseleq r6, 0, r6 ; CHECK-NEXT: mtocrf 128, r9 -; CHECK-NEXT: add r3, r6, r3 ; CHECK-NEXT: mtocrf 32, r12 ; CHECK-NEXT: mtocrf 16, r12 ; CHECK-NEXT: mtocrf 8, r12 ; CHECK-NEXT: iseleq r4, 0, r4 +; CHECK-NEXT: add r3, r6, r3 ; CHECK-NEXT: add r3, r4, r3 ; CHECK-NEXT: xsmuldp f0, f0, f2 ; CHECK-NEXT: mtfprd f1, r3 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -45,12 +45,12 @@ ; CHECK-LARGE: ld r2, .Lfunc_toc2-.Lfunc_gep2(r12) ; CHECK-LARGE: add r2, r2, r12 ; CHECK-S: .localentry AsmClobberX2WithTOC -; CHECK-S: add r3, r4, r3 -; CHECK-S-NEXT: #APP +; CHECK-S: #APP ; CHECK-S-NEXT: li r2, 0 ; CHECK-S-NEXT: #NO_APP -; CHECK-S-NEXT: plwz r4, global_int@PCREL(0), 1 -; CHECK-S-NEXT: add r3, r3, r4 +; CHECK-S-NEXT: plwz r5, global_int@PCREL(0), 1 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: add r3, r3, r5 ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: blr entry: