Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4180,16 +4180,24 @@ break; } - // If this is a load or store with a zero offset, we may be able to - // fold an add-immediate into the memory operation. - if (!isa(N->getOperand(FirstOp)) || - N->getConstantOperandVal(FirstOp) != 0) + // If this is a load or store with a zero offset, or within the alignment, + // we may be able to fold an add-immediate into the memory operation. + // The check against alignment is below, as it can't occur until we check + // the arguments to N + if (!isa(N->getOperand(FirstOp))) continue; SDValue Base = N->getOperand(FirstOp + 1); if (!Base.isMachineOpcode()) continue; + // On targets with fusion, we don't want this to fire and remove a fusion + // opportunity, unless a) it results in another fusion opportunity or + // b) optimizing for size. + if (PPCSubTarget->hasFusion() && + (!MF->getFunction()->optForSize() && !Base.hasOneUse()) + continue; + unsigned Flags = 0; bool ReplaceFlags = true; @@ -4233,6 +4241,17 @@ break; } + SDValue ImmOpnd = Base.getOperand(1); + int MaxDisplacement = 0; + if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { + const GlobalValue *GV = GA->getGlobal(); + MaxDisplacement = GV->getAlignment() - 1; + } + + int Offset = N->getConstantOperandVal(FirstOp); + if (Offset < 0 || Offset > MaxDisplacement) + continue; + // We found an opportunity. Reverse the operands from the add // immediate and substitute them into the load or store. If // needed, update the target flags for the immediate operand to @@ -4243,8 +4262,6 @@ DEBUG(N->dump(CurDAG)); DEBUG(dbgs() << "\n"); - SDValue ImmOpnd = Base.getOperand(1); - // If the relocation information isn't already present on the // immediate operand, add it now. if (ReplaceFlags) { @@ -4255,17 +4272,17 @@ // is insufficient for the instruction encoding. if (GV->getAlignment() < 4 && (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD || - StorageOpcode == PPC::LWA)) { + StorageOpcode == PPC::LWA || (Offset % 4) != 0)) { DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); continue; } - ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); + ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); } else if (ConstantPoolSDNode *CP = dyn_cast(ImmOpnd)) { const Constant *C = CP->getConstVal(); ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlignment(), - 0, Flags); + Offset, Flags); } } Index: test/CodeGen/PowerPC/peephole-align.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/peephole-align.ll @@ -0,0 +1,335 @@ +; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck -check-prefix=POWER7 -check-prefix=CHECK %s +; RUN: llc -mcpu=pwr8 -O1 -code-model=medium <%s | FileCheck -check-prefix=POWER8 -check-prefix=CHECK %s + +; Test peephole optimization for medium code model (32-bit TOC offsets) +; for loading and storing small offsets within aligned values. +; For power8, verify that the optimization doesn't fire, as it prevents fusion +; opportunities. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.b4 = type<{ i8, i8, i8, i8 }> +%struct.h2 = type<{ i16, i16 }> + +%struct.b8 = type<{ i8, i8, i8, i8, i8, i8, i8, i8 }> +%struct.h4 = type<{ i16, i16, i16, i16 }> +%struct.w2 = type<{ i32, i32 }> + +%struct.d2 = type<{ i64, i64 }> +%struct.misalign = type<{ i8, i64 }> + +@b4v = global %struct.b4 <{ i8 1, i8 2, i8 3, i8 4 }>, align 4 +@h2v = global %struct.h2 <{ i16 1, i16 2 }>, align 4 + +@b8v = global %struct.b8 <{ i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8 }>, align 8 +@h4v = global %struct.h4 <{ i16 1, i16 2, i16 3, i16 4 }>, align 8 +@w2v = global %struct.w2 <{ i32 1, i32 2 }>, align 8 + +@d2v = global %struct.d2 <{ i64 1, i64 2 }>, align 16 +@misalign_v = global %struct.misalign <{ i8 1, i64 2 }>, align 16 + +; CHECK-LABEL: test_b4: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, b4v@toc@ha +; POWER7-DAG: lbz [[REG0_0:[0-9]+]], b4v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG1_0:[0-9]+]], b4v@toc@l+1([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG2_0:[0-9]+]], b4v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG3_0:[0-9]+]], b4v@toc@l+3([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER7-DAG: stb [[REG0_1]], b4v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG1_1]], b4v@toc@l+1([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG2_1]], b4v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG3_1]], b4v@toc@l+3([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, b4v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], b4v@toc@l +; POWER8-DAG: lbz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG1_0:[0-9]+]], 1([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG2_0:[0-9]+]], 2([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG3_0:[0-9]+]], 3([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER8-DAG: stb [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG1_1]], 1([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG2_1]], 2([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG3_1]], 3([[REGSTRUCT]]) +define void @test_b4() nounwind { +entry: + %0 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 0), align 1 + %inc0 = add nsw i8 %0, 1 + store i8 %inc0, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 0), align 1 + %1 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 1), align 1 + %inc1 = add nsw i8 %1, 2 + store i8 %inc1, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 1), align 1 + %2 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 2), align 1 + %inc2 = add nsw i8 %2, 3 + store i8 %inc2, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 2), align 1 + %3 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 3), align 1 + %inc3 = add nsw i8 %3, 4 + store i8 %inc3, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 3), align 1 + ret void +} + +; CHECK-LABEL: test_h2: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha +; POWER7-DAG: lhz [[REG0_0:[0-9]+]], h2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: lhz [[REG1_0:[0-9]+]], h2v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: sth [[REG0_1]], h2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: sth [[REG1_1]], h2v@toc@l+2([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], h2v@toc@l +; POWER8-DAG: lhz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: lhz [[REG1_0:[0-9]+]], 2([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: sth [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: sth [[REG1_1]], 2([[REGSTRUCT]]) +define void @test_h2() nounwind { +entry: + %0 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2 + %inc0 = add nsw i16 %0, 1 + store i16 %inc0, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2 + %1 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2 + %inc1 = add nsw i16 %1, 2 + store i16 %inc1, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2 + ret void +} + +; CHECK-LABEL: test_h2_optsize: +; CHECK: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha +; CHECK-DAG: lhz [[REG0_0:[0-9]+]], h2v@toc@l([[REGSTRUCT]]) +; CHECK-DAG: lhz [[REG1_0:[0-9]+]], h2v@toc@l+2([[REGSTRUCT]]) +; CHECK-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; CHECK-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; CHECK-DAG: sth [[REG0_1]], h2v@toc@l([[REGSTRUCT]]) +; CHECK-DAG: sth [[REG1_1]], h2v@toc@l+2([[REGSTRUCT]]) +define void @test_h2_optsize() optsize nounwind { +entry: + %0 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2 + %inc0 = add nsw i16 %0, 1 + store i16 %inc0, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2 + %1 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2 + %inc1 = add nsw i16 %1, 2 + store i16 %inc1, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2 + ret void +} + +; CHECK-LABEL: test_b8: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, b8v@toc@ha +; POWER7-DAG: lbz [[REG0_0:[0-9]+]], b8v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG1_0:[0-9]+]], b8v@toc@l+1([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG2_0:[0-9]+]], b8v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG3_0:[0-9]+]], b8v@toc@l+3([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG4_0:[0-9]+]], b8v@toc@l+4([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG5_0:[0-9]+]], b8v@toc@l+5([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG6_0:[0-9]+]], b8v@toc@l+6([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG7_0:[0-9]+]], b8v@toc@l+7([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER7-DAG: addi [[REG4_1:[0-9]+]], [[REG4_0]], 5 +; POWER7-DAG: addi [[REG5_1:[0-9]+]], [[REG5_0]], 6 +; POWER7-DAG: addi [[REG6_1:[0-9]+]], [[REG6_0]], 7 +; POWER7-DAG: addi [[REG7_1:[0-9]+]], [[REG7_0]], 8 +; POWER7-DAG: stb [[REG0_1]], b8v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG1_1]], b8v@toc@l+1([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG2_1]], b8v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG3_1]], b8v@toc@l+3([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG4_1]], b8v@toc@l+4([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG5_1]], b8v@toc@l+5([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG6_1]], b8v@toc@l+6([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG7_1]], b8v@toc@l+7([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, b8v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], b8v@toc@l +; POWER8-DAG: lbz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG1_0:[0-9]+]], 1([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG2_0:[0-9]+]], 2([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG3_0:[0-9]+]], 3([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG4_0:[0-9]+]], 4([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG5_0:[0-9]+]], 5([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG6_0:[0-9]+]], 6([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG7_0:[0-9]+]], 7([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER8-DAG: addi [[REG4_1:[0-9]+]], [[REG4_0]], 5 +; POWER8-DAG: addi [[REG5_1:[0-9]+]], [[REG5_0]], 6 +; POWER8-DAG: addi [[REG6_1:[0-9]+]], [[REG6_0]], 7 +; POWER8-DAG: addi [[REG7_1:[0-9]+]], [[REG7_0]], 8 +; POWER8-DAG: stb [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG1_1]], 1([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG2_1]], 2([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG3_1]], 3([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG4_1]], 4([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG5_1]], 5([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG6_1]], 6([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG7_1]], 7([[REGSTRUCT]]) +define void @test_b8() nounwind { +entry: + %0 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 0), align 1 + %inc0 = add nsw i8 %0, 1 + store i8 %inc0, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 0), align 1 + %1 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 1), align 1 + %inc1 = add nsw i8 %1, 2 + store i8 %inc1, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 1), align 1 + %2 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 2), align 1 + %inc2 = add nsw i8 %2, 3 + store i8 %inc2, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 2), align 1 + %3 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 3), align 1 + %inc3 = add nsw i8 %3, 4 + store i8 %inc3, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 3), align 1 + %4 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 4), align 1 + %inc4 = add nsw i8 %4, 5 + store i8 %inc4, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 4), align 1 + %5 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 5), align 1 + %inc5 = add nsw i8 %5, 6 + store i8 %inc5, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 5), align 1 + %6 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 6), align 1 + %inc6 = add nsw i8 %6, 7 + store i8 %inc6, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 6), align 1 + %7 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 7), align 1 + %inc7 = add nsw i8 %7, 8 + store i8 %inc7, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 7), align 1 + ret void +} + +; CHECK-LABEL: test_h4: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, h4v@toc@ha +; POWER7-DAG: lhz [[REG0_0:[0-9]+]], h4v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: lhz [[REG1_0:[0-9]+]], h4v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: lhz [[REG2_0:[0-9]+]], h4v@toc@l+4([[REGSTRUCT]]) +; POWER7-DAG: lhz [[REG3_0:[0-9]+]], h4v@toc@l+6([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER7-DAG: sth [[REG0_1]], h4v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: sth [[REG1_1]], h4v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: sth [[REG2_1]], h4v@toc@l+4([[REGSTRUCT]]) +; POWER7-DAG: sth [[REG3_1]], h4v@toc@l+6([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, h4v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], h4v@toc@l +; POWER8-DAG: lhz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: lhz [[REG1_0:[0-9]+]], 2([[REGSTRUCT]]) +; POWER8-DAG: lhz [[REG2_0:[0-9]+]], 4([[REGSTRUCT]]) +; POWER8-DAG: lhz [[REG3_0:[0-9]+]], 6([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER8-DAG: sth [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: sth [[REG1_1]], 2([[REGSTRUCT]]) +; POWER8-DAG: sth [[REG2_1]], 4([[REGSTRUCT]]) +; POWER8-DAG: sth [[REG3_1]], 6([[REGSTRUCT]]) +define void @test_h4() nounwind { +entry: + %0 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 0), align 2 + %inc0 = add nsw i16 %0, 1 + store i16 %inc0, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 0), align 2 + %1 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 1), align 2 + %inc1 = add nsw i16 %1, 2 + store i16 %inc1, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 1), align 2 + %2 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 2), align 2 + %inc2 = add nsw i16 %2, 3 + store i16 %inc2, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 2), align 2 + %3 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 3), align 2 + %inc3 = add nsw i16 %3, 4 + store i16 %inc3, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 3), align 2 + ret void +} + +; CHECK-LABEL: test_w2: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, w2v@toc@ha +; POWER7-DAG: lwz [[REG0_0:[0-9]+]], w2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: lwz [[REG1_0:[0-9]+]], w2v@toc@l+4([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: stw [[REG0_1]], w2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: stw [[REG1_1]], w2v@toc@l+4([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, w2v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], w2v@toc@l +; POWER8-DAG: lwz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: lwz [[REG1_0:[0-9]+]], 4([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: stw [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: stw [[REG1_1]], 4([[REGSTRUCT]]) +define void @test_w2() nounwind { +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 0), align 4 + %inc0 = add nsw i32 %0, 1 + store i32 %inc0, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 0), align 4 + %1 = load i32, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 1), align 4 + %inc1 = add nsw i32 %1, 2 + store i32 %inc1, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 1), align 4 + ret void +} + +; CHECK-LABEL: test_d2: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, d2v@toc@ha +; POWER7-DAG: ld [[REG0_0:[0-9]+]], d2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: ld [[REG1_0:[0-9]+]], d2v@toc@l+8([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: std [[REG0_1]], d2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: std [[REG1_1]], d2v@toc@l+8([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, d2v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], d2v@toc@l +; POWER8-DAG: ld [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: ld [[REG1_0:[0-9]+]], 8([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: std [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: std [[REG1_1]], 8([[REGSTRUCT]]) +define void @test_d2() nounwind { +entry: + %0 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 0), align 8 + %inc0 = add nsw i64 %0, 1 + store i64 %inc0, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 0), align 8 + %1 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8 + %inc1 = add nsw i64 %1, 2 + store i64 %inc1, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8 + ret void +} + +; Make sure the optimization fires on power8 if there is a single use resulting +; in a better fusion opportunity. +; register 3 is the return value, so it should be chosen +; CHECK-LABEL: test_singleuse: +; CHECK: addis 3, 2, d2v@toc@ha +; CHECK: ld 3, d2v@toc@l+8(3) +define i64 @test_singleuse() nounwind { +entry: + %0 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8 + ret i64 %0 +} + +; Make sure the optimization fails to fire if the symbol is aligned, but the offset is not. +; CHECK-LABEL: test_misalign +; POWER7: addis [[REGSTRUCT_0:[0-9]+]], 2, misalign_v@toc@ha +; POWER7: addi [[REGSTRUCT:[0-9]+]], [[REGSTRUCT_0]], misalign_v@toc@l +; POWER7: li [[OFFSET_REG:[0-9]+]], 1 +; POWER7: ldx [[REG0_0:[0-9]+]], [[REGSTRUCT]], [[OFFSET_REG]] +; POWER7: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7: stdx [[REG0_1]], [[REGSTRUCT]], [[OFFSET_REG]] +define void @test_misalign() nounwind { +entry: + %0 = load i64, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1 + %inc0 = add nsw i64 %0, 1 + store i64 %inc0, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1 + ret void +}