Diff 40826

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Show First 20 Lines • Show All 4,131 Lines • ▼ Show 20 Lines	if (MadeChange)
CurDAG->RemoveDeadNodes();		CurDAG->RemoveDeadNodes();
}		}

void PPCDAGToDAGISel::PeepholePPC64() {		void PPCDAGToDAGISel::PeepholePPC64() {
// These optimizations are currently supported only for 64-bit SVR4.		// These optimizations are currently supported only for 64-bit SVR4.
if (PPCSubTarget->isDarwin() \|\| !PPCSubTarget->isPPC64())		if (PPCSubTarget->isDarwin() \|\| !PPCSubTarget->isPPC64())
return;		return;

		// The below optimization can actually impede a processor with fusion by
		// keeping the first register live and preventing fusion. Skip it on
		// processors with fusion.
		if (PPCSubTarget->hasFusion())
		hfinkelUnsubmitted Done Reply Inline Actions But we should do this when optimizing for code size, even on the `P8`: if (PPCSubTarget->hasFusion() && !MF->getFunction()->optForSize()) or, if this really hurts performance on the `P8`, use `optForMinSize()`. Also, we don't need to turn this off on the `P8` when there is only a single (non-debug) user because, as the ELF v2 ABI spec points out: addis r4, r3, upper <lbz,lhz,lwz,ld> r4, lower(r4) is also good. hfinkel: But we should do this when optimizing for code size, even on the `P8`: if (PPCSubTarget…
		iterateeAuthorUnsubmitted Not Done Reply Inline Actions Nice catch. I've implemented that. iteratee: Nice catch. I've implemented that.
		return;

SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());		SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;		++Position;

while (Position != CurDAG->allnodes_begin()) {		while (Position != CurDAG->allnodes_begin()) {
SDNode N = &--Position;		SDNode N = &--Position;
// Skip dead nodes and any non-machine opcodes.		// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() \|\| !N->isMachineOpcode())		if (N->use_empty() \|\| !N->isMachineOpcode())
continue;		continue;
Show All 11 Lines	while (Position != CurDAG->allnodes_begin()) {
case PPC::LFS:		case PPC::LFS:
case PPC::LHA:		case PPC::LHA:
case PPC::LHA8:		case PPC::LHA8:
case PPC::LHZ:		case PPC::LHZ:
case PPC::LHZ8:		case PPC::LHZ8:
case PPC::LWA:		case PPC::LWA:
case PPC::LWZ:		case PPC::LWZ:
case PPC::LWZ8:		case PPC::LWZ8:
FirstOp = 0;		FirstOp = 0;
break;		break;

case PPC::STB:		case PPC::STB:
case PPC::STB8:		case PPC::STB8:
case PPC::STD:		case PPC::STD:
case PPC::STFD:		case PPC::STFD:
case PPC::STFS:		case PPC::STFS:
case PPC::STH:		case PPC::STH:
case PPC::STH8:		case PPC::STH8:
case PPC::STW:		case PPC::STW:
case PPC::STW8:		case PPC::STW8:
FirstOp = 1;		FirstOp = 1;
break;		break;
}		}
		echristoUnsubmitted Done Reply Inline Actions Not sure I understand the moves here? echristo: Not sure I understand the moves here?
		iterateeAuthorUnsubmitted Done Reply Inline Actions They got sorted according to size when I thought I needed to know the size. Reverted. iteratee: They got sorted according to size when I thought I needed to know the size. Reverted.

// If this is a load or store with a zero offset, we may be able to		// If this is a load or store with a zero offset, or within the alignment,
// fold an add-immediate into the memory operation.		// we may be able to fold an add-immediate into the memory operation.
if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) \|\|		// The check against alignment is below, as it can't occur until we check
N->getConstantOperandVal(FirstOp) != 0)		// the arguments to N
		if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
continue;		continue;

SDValue Base = N->getOperand(FirstOp + 1);		SDValue Base = N->getOperand(FirstOp + 1);
if (!Base.isMachineOpcode())		if (!Base.isMachineOpcode())
continue;		continue;

unsigned Flags = 0;		unsigned Flags = 0;
bool ReplaceFlags = true;		bool ReplaceFlags = true;

// When the feeding operation is an add-immediate of some sort,		// When the feeding operation is an add-immediate of some sort,
// determine whether we need to add relocation information to the		// determine whether we need to add relocation information to the
// target flags on the immediate operand when we fold it into the		// target flags on the immediate operand when we fold it into the
		echristoUnsubmitted Done Reply Inline Actions Looks weird. Feel like collapsing it to a single if conditional? echristo: Looks weird. Feel like collapsing it to a single if conditional?
// load instruction.		// load instruction.
//		//
// For something like ADDItocL, the relocation information is		// For something like ADDItocL, the relocation information is
// inferred from the opcode; when we process it in the AsmPrinter,		// inferred from the opcode; when we process it in the AsmPrinter,
// we add the necessary relocation there. A load, though, can receive		// we add the necessary relocation there. A load, though, can receive
// relocation from various flavors of ADDIxxx, so we need to carry		// relocation from various flavors of ADDIxxx, so we need to carry
// the relocation information in the target flags.		// the relocation information in the target flags.
switch (Base.getMachineOpcode()) {		switch (Base.getMachineOpcode()) {
Show All 21 Lines	while (Position != CurDAG->allnodes_begin()) {
case PPC::ADDItlsldL:		case PPC::ADDItlsldL:
Flags = PPCII::MO_TLSLD_LO;		Flags = PPCII::MO_TLSLD_LO;
break;		break;
case PPC::ADDItocL:		case PPC::ADDItocL:
Flags = PPCII::MO_TOC_LO;		Flags = PPCII::MO_TOC_LO;
break;		break;
}		}

		SDValue ImmOpnd = Base.getOperand(1);
		int MaxDisplacement = 0;
		if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
		const GlobalValue *GV = GA->getGlobal();
		MaxDisplacement = GV->getAlignment() - 1;
		}

		int Offset = N->getConstantOperandVal(FirstOp);
		if (Offset < 0 \|\| Offset > MaxDisplacement)
		continue;

// We found an opportunity. Reverse the operands from the add		// We found an opportunity. Reverse the operands from the add
// immediate and substitute them into the load or store. If		// immediate and substitute them into the load or store. If
// needed, update the target flags for the immediate operand to		// needed, update the target flags for the immediate operand to
// reflect the necessary relocation information.		// reflect the necessary relocation information.
DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");		DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
DEBUG(Base->dump(CurDAG));		DEBUG(Base->dump(CurDAG));
DEBUG(dbgs() << "\nN: ");		DEBUG(dbgs() << "\nN: ");
DEBUG(N->dump(CurDAG));		DEBUG(N->dump(CurDAG));
DEBUG(dbgs() << "\n");		DEBUG(dbgs() << "\n");

SDValue ImmOpnd = Base.getOperand(1);

// If the relocation information isn't already present on the		// If the relocation information isn't already present on the
// immediate operand, add it now.		// immediate operand, add it now.
if (ReplaceFlags) {		if (ReplaceFlags) {
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {		if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
SDLoc dl(GA);		SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();		const GlobalValue *GV = GA->getGlobal();
// We can't perform this optimization for data whose alignment		// We can't perform this optimization for data whose alignment
// is insufficient for the instruction encoding.		// is insufficient for the instruction encoding.
if (GV->getAlignment() < 4 &&		if (GV->getAlignment() < 4 &&
(StorageOpcode == PPC::LD \|\| StorageOpcode == PPC::STD \|\|		(StorageOpcode == PPC::LD \|\| StorageOpcode == PPC::STD \|\|
StorageOpcode == PPC::LWA)) {		StorageOpcode == PPC::LWA \|\| (Offset % 4) != 0)) {
DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");		DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
continue;		continue;
}		}
ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);		ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
} else if (ConstantPoolSDNode *CP =		} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {		dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
const Constant *C = CP->getConstVal();		const Constant *C = CP->getConstVal();
ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,		ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
CP->getAlignment(),		CP->getAlignment(),
0, Flags);		Offset, Flags);
}		}
}		}

if (FirstOp == 1) // Store		if (FirstOp == 1) // Store
(void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,		(void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
Base.getOperand(0), N->getOperand(3));		Base.getOperand(0), N->getOperand(3));
else // Load		else // Load
(void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),		(void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
Show All 27 Lines

test/CodeGen/PowerPC/peephole-align.ll

This file was added.

				; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s \| FileCheck -check-prefix=POWER7 -check-prefix=CHECK %s
				; RUN: llc -mcpu=pwr8 -O1 -code-model=medium <%s \| FileCheck -check-prefix=POWER8 -check-prefix=CHECK %s

				; Test peephole optimization for medium code model (32-bit TOC offsets)
				; for loading and storing small offsets within aligned values.
				; For power8, verify that the optimization doesn't fire, as it prevents fusion
				; opportunities.

				target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
				target triple = "powerpc64-unknown-linux-gnu"

				%struct.b4 = type<{ i8, i8, i8, i8 }>
				%struct.h2 = type<{ i16, i16 }>

				%struct.b8 = type<{ i8, i8, i8, i8, i8, i8, i8, i8 }>
				%struct.h4 = type<{ i16, i16, i16, i16 }>
				%struct.w2 = type<{ i32, i32 }>

				%struct.d2 = type<{ i64, i64 }>
				%struct.misalign = type<{ i8, i64 }>

				@b4v = global %struct.b4 <{ i8 1, i8 2, i8 3, i8 4 }>, align 4
				@h2v = global %struct.h2 <{ i16 1, i16 2 }>, align 4

				@b8v = global %struct.b8 <{ i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8 }>, align 8
				@h4v = global %struct.h4 <{ i16 1, i16 2, i16 3, i16 4 }>, align 8
				@w2v = global %struct.w2 <{ i32 1, i32 2 }>, align 8

				@d2v = global %struct.d2 <{ i64 1, i64 2 }>, align 16
				@misalign_v = global %struct.misalign <{ i8 1, i64 2 }>, align 16

				; CHECK-LABEL: test_b4:
				; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, b4v@toc@ha
				; POWER7-DAG: lbz [[REG0_0:[0-9]+]], b4v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: lbz [[REG1_0:[0-9]+]], b4v@toc@l+1([[REGSTRUCT]])
				; POWER7-DAG: lbz [[REG2_0:[0-9]+]], b4v@toc@l+2([[REGSTRUCT]])
				; POWER7-DAG: lbz [[REG3_0:[0-9]+]], b4v@toc@l+3([[REGSTRUCT]])
				; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
				; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
				; POWER7-DAG: stb [[REG0_1]], b4v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: stb [[REG1_1]], b4v@toc@l+1([[REGSTRUCT]])
				; POWER7-DAG: stb [[REG2_1]], b4v@toc@l+2([[REGSTRUCT]])
				; POWER7-DAG: stb [[REG3_1]], b4v@toc@l+3([[REGSTRUCT]])

				; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, b4v@toc@ha
				; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], b4v@toc@l
				; POWER8-DAG: lbz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
				; POWER8-DAG: lbz [[REG1_0:[0-9]+]], 1([[REGSTRUCT]])
				; POWER8-DAG: lbz [[REG2_0:[0-9]+]], 2([[REGSTRUCT]])
				; POWER8-DAG: lbz [[REG3_0:[0-9]+]], 3([[REGSTRUCT]])
				; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
				; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
				; POWER8-DAG: stb [[REG0_1]], 0([[REGSTRUCT]])
				; POWER8-DAG: stb [[REG1_1]], 1([[REGSTRUCT]])
				; POWER8-DAG: stb [[REG2_1]], 2([[REGSTRUCT]])
				; POWER8-DAG: stb [[REG3_1]], 3([[REGSTRUCT]])
				define void @test_b4() nounwind {
				entry:
				%0 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 0), align 1
				%inc0 = add nsw i8 %0, 1
				store i8 %inc0, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 0), align 1
				%1 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 1), align 1
				%inc1 = add nsw i8 %1, 2
				store i8 %inc1, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 1), align 1
				%2 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 2), align 1
				%inc2 = add nsw i8 %2, 3
				store i8 %inc2, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 2), align 1
				%3 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 3), align 1
				%inc3 = add nsw i8 %3, 4
				store i8 %inc3, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 3), align 1
				ret void
				}

				; CHECK-LABEL: test_h2:
				; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha
				; POWER7-DAG: lhz [[REG0_0:[0-9]+]], h2v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: lhz [[REG1_0:[0-9]+]], h2v@toc@l+2([[REGSTRUCT]])
				; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER7-DAG: sth [[REG0_1]], h2v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: sth [[REG1_1]], h2v@toc@l+2([[REGSTRUCT]])

				; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha
				; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], h2v@toc@l
				; POWER8-DAG: lhz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
				; POWER8-DAG: lhz [[REG1_0:[0-9]+]], 2([[REGSTRUCT]])
				; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER8-DAG: sth [[REG0_1]], 0([[REGSTRUCT]])
				; POWER8-DAG: sth [[REG1_1]], 2([[REGSTRUCT]])
				define void @test_h2() nounwind {
				entry:
				%0 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2
				%inc0 = add nsw i16 %0, 1
				store i16 %inc0, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2
				%1 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2
				%inc1 = add nsw i16 %1, 2
				store i16 %inc1, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2
				ret void
				}

				; CHECK-LABEL: test_b8:
				; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, b8v@toc@ha
				; POWER7-DAG: lbz [[REG0_0:[0-9]+]], b8v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: lbz [[REG1_0:[0-9]+]], b8v@toc@l+1([[REGSTRUCT]])
				; POWER7-DAG: lbz [[REG2_0:[0-9]+]], b8v@toc@l+2([[REGSTRUCT]])
				; POWER7-DAG: lbz [[REG3_0:[0-9]+]], b8v@toc@l+3([[REGSTRUCT]])
				; POWER7-DAG: lbz [[REG4_0:[0-9]+]], b8v@toc@l+4([[REGSTRUCT]])
				; POWER7-DAG: lbz [[REG5_0:[0-9]+]], b8v@toc@l+5([[REGSTRUCT]])
				; POWER7-DAG: lbz [[REG6_0:[0-9]+]], b8v@toc@l+6([[REGSTRUCT]])
				; POWER7-DAG: lbz [[REG7_0:[0-9]+]], b8v@toc@l+7([[REGSTRUCT]])
				; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
				; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
				; POWER7-DAG: addi [[REG4_1:[0-9]+]], [[REG4_0]], 5
				; POWER7-DAG: addi [[REG5_1:[0-9]+]], [[REG5_0]], 6
				; POWER7-DAG: addi [[REG6_1:[0-9]+]], [[REG6_0]], 7
				; POWER7-DAG: addi [[REG7_1:[0-9]+]], [[REG7_0]], 8
				; POWER7-DAG: stb [[REG0_1]], b8v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: stb [[REG1_1]], b8v@toc@l+1([[REGSTRUCT]])
				; POWER7-DAG: stb [[REG2_1]], b8v@toc@l+2([[REGSTRUCT]])
				; POWER7-DAG: stb [[REG3_1]], b8v@toc@l+3([[REGSTRUCT]])
				; POWER7-DAG: stb [[REG4_1]], b8v@toc@l+4([[REGSTRUCT]])
				; POWER7-DAG: stb [[REG5_1]], b8v@toc@l+5([[REGSTRUCT]])
				; POWER7-DAG: stb [[REG6_1]], b8v@toc@l+6([[REGSTRUCT]])
				; POWER7-DAG: stb [[REG7_1]], b8v@toc@l+7([[REGSTRUCT]])

				; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, b8v@toc@ha
				; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], b8v@toc@l
				; POWER8-DAG: lbz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
				; POWER8-DAG: lbz [[REG1_0:[0-9]+]], 1([[REGSTRUCT]])
				; POWER8-DAG: lbz [[REG2_0:[0-9]+]], 2([[REGSTRUCT]])
				; POWER8-DAG: lbz [[REG3_0:[0-9]+]], 3([[REGSTRUCT]])
				; POWER8-DAG: lbz [[REG4_0:[0-9]+]], 4([[REGSTRUCT]])
				; POWER8-DAG: lbz [[REG5_0:[0-9]+]], 5([[REGSTRUCT]])
				; POWER8-DAG: lbz [[REG6_0:[0-9]+]], 6([[REGSTRUCT]])
				; POWER8-DAG: lbz [[REG7_0:[0-9]+]], 7([[REGSTRUCT]])
				; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
				; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
				; POWER8-DAG: addi [[REG4_1:[0-9]+]], [[REG4_0]], 5
				; POWER8-DAG: addi [[REG5_1:[0-9]+]], [[REG5_0]], 6
				; POWER8-DAG: addi [[REG6_1:[0-9]+]], [[REG6_0]], 7
				; POWER8-DAG: addi [[REG7_1:[0-9]+]], [[REG7_0]], 8
				; POWER8-DAG: stb [[REG0_1]], 0([[REGSTRUCT]])
				; POWER8-DAG: stb [[REG1_1]], 1([[REGSTRUCT]])
				; POWER8-DAG: stb [[REG2_1]], 2([[REGSTRUCT]])
				; POWER8-DAG: stb [[REG3_1]], 3([[REGSTRUCT]])
				; POWER8-DAG: stb [[REG4_1]], 4([[REGSTRUCT]])
				; POWER8-DAG: stb [[REG5_1]], 5([[REGSTRUCT]])
				; POWER8-DAG: stb [[REG6_1]], 6([[REGSTRUCT]])
				; POWER8-DAG: stb [[REG7_1]], 7([[REGSTRUCT]])
				define void @test_b8() nounwind {
				entry:
				%0 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 0), align 1
				%inc0 = add nsw i8 %0, 1
				store i8 %inc0, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 0), align 1
				%1 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 1), align 1
				%inc1 = add nsw i8 %1, 2
				store i8 %inc1, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 1), align 1
				%2 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 2), align 1
				%inc2 = add nsw i8 %2, 3
				store i8 %inc2, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 2), align 1
				%3 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 3), align 1
				%inc3 = add nsw i8 %3, 4
				store i8 %inc3, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 3), align 1
				%4 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 4), align 1
				%inc4 = add nsw i8 %4, 5
				store i8 %inc4, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 4), align 1
				%5 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 5), align 1
				%inc5 = add nsw i8 %5, 6
				store i8 %inc5, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 5), align 1
				%6 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 6), align 1
				%inc6 = add nsw i8 %6, 7
				store i8 %inc6, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 6), align 1
				%7 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 7), align 1
				%inc7 = add nsw i8 %7, 8
				store i8 %inc7, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 7), align 1
				ret void
				}

				; CHECK-LABEL: test_h4:
				; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, h4v@toc@ha
				; POWER7-DAG: lhz [[REG0_0:[0-9]+]], h4v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: lhz [[REG1_0:[0-9]+]], h4v@toc@l+2([[REGSTRUCT]])
				; POWER7-DAG: lhz [[REG2_0:[0-9]+]], h4v@toc@l+4([[REGSTRUCT]])
				; POWER7-DAG: lhz [[REG3_0:[0-9]+]], h4v@toc@l+6([[REGSTRUCT]])
				; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
				; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
				; POWER7-DAG: sth [[REG0_1]], h4v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: sth [[REG1_1]], h4v@toc@l+2([[REGSTRUCT]])
				; POWER7-DAG: sth [[REG2_1]], h4v@toc@l+4([[REGSTRUCT]])
				; POWER7-DAG: sth [[REG3_1]], h4v@toc@l+6([[REGSTRUCT]])

				; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, h4v@toc@ha
				; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], h4v@toc@l
				; POWER8-DAG: lhz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
				; POWER8-DAG: lhz [[REG1_0:[0-9]+]], 2([[REGSTRUCT]])
				; POWER8-DAG: lhz [[REG2_0:[0-9]+]], 4([[REGSTRUCT]])
				; POWER8-DAG: lhz [[REG3_0:[0-9]+]], 6([[REGSTRUCT]])
				; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
				; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
				; POWER8-DAG: sth [[REG0_1]], 0([[REGSTRUCT]])
				; POWER8-DAG: sth [[REG1_1]], 2([[REGSTRUCT]])
				; POWER8-DAG: sth [[REG2_1]], 4([[REGSTRUCT]])
				; POWER8-DAG: sth [[REG3_1]], 6([[REGSTRUCT]])
				define void @test_h4() nounwind {
				entry:
				%0 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 0), align 2
				%inc0 = add nsw i16 %0, 1
				store i16 %inc0, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 0), align 2
				%1 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 1), align 2
				%inc1 = add nsw i16 %1, 2
				store i16 %inc1, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 1), align 2
				%2 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 2), align 2
				%inc2 = add nsw i16 %2, 3
				store i16 %inc2, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 2), align 2
				%3 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 3), align 2
				%inc3 = add nsw i16 %3, 4
				store i16 %inc3, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 3), align 2
				ret void
				}

				; CHECK-LABEL: test_w2:
				; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, w2v@toc@ha
				; POWER7-DAG: lwz [[REG0_0:[0-9]+]], w2v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: lwz [[REG1_0:[0-9]+]], w2v@toc@l+4([[REGSTRUCT]])
				; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER7-DAG: stw [[REG0_1]], w2v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: stw [[REG1_1]], w2v@toc@l+4([[REGSTRUCT]])

				; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, w2v@toc@ha
				; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], w2v@toc@l
				; POWER8-DAG: lwz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
				; POWER8-DAG: lwz [[REG1_0:[0-9]+]], 4([[REGSTRUCT]])
				; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER8-DAG: stw [[REG0_1]], 0([[REGSTRUCT]])
				; POWER8-DAG: stw [[REG1_1]], 4([[REGSTRUCT]])
				define void @test_w2() nounwind {
				entry:
				%0 = load i32, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 0), align 4
				%inc0 = add nsw i32 %0, 1
				store i32 %inc0, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 0), align 4
				%1 = load i32, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 1), align 4
				%inc1 = add nsw i32 %1, 2
				store i32 %inc1, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 1), align 4
				ret void
				}

				; CHECK-LABEL: test_d2:
				; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, d2v@toc@ha
				; POWER7-DAG: ld [[REG0_0:[0-9]+]], d2v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: ld [[REG1_0:[0-9]+]], d2v@toc@l+8([[REGSTRUCT]])
				; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER7-DAG: std [[REG0_1]], d2v@toc@l([[REGSTRUCT]])
				; POWER7-DAG: std [[REG1_1]], d2v@toc@l+8([[REGSTRUCT]])

				; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, d2v@toc@ha
				; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], d2v@toc@l
				; POWER8-DAG: ld [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
				; POWER8-DAG: ld [[REG1_0:[0-9]+]], 8([[REGSTRUCT]])
				; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
				; POWER8-DAG: std [[REG0_1]], 0([[REGSTRUCT]])
				; POWER8-DAG: std [[REG1_1]], 8([[REGSTRUCT]])
				define void @test_d2() nounwind {
				entry:
				%0 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 0), align 8
				%inc0 = add nsw i64 %0, 1
				store i64 %inc0, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 0), align 8
				%1 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8
				%inc1 = add nsw i64 %1, 2
				store i64 %inc1, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8
				ret void
				}

				; Make sure the optimization fails to fire if the symbol is aligned, but the offset is not.
				; CHECK-LABEL: test_misalign
				; POWER7: addis [[REGSTRUCT_0:[0-9]+]], 2, misalign_v@toc@ha
				; POWER7: addi [[REGSTRUCT:[0-9]+]], [[REGSTRUCT_0]], misalign_v@toc@l
				; POWER7: li [[OFFSET_REG:[0-9]+]], 1
				; POWER7: ldx [[REG0_0:[0-9]+]], [[REGSTRUCT]], [[OFFSET_REG]]
				; POWER7: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
				; POWER7: stdx [[REG0_1]], [[REGSTRUCT]], [[OFFSET_REG]]
				define void @test_misalign() nounwind {
				entry:
				%0 = load i64, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1
				%inc0 = add nsw i64 %0, 1
				store i64 %inc0, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[PPC]: Peephole optimize small accesss to aligned globals.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 40826

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

test/CodeGen/PowerPC/peephole-align.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PPC]: Peephole optimize small accesss to aligned globals.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 40826

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

test/CodeGen/PowerPC/peephole-align.ll

[PPC]: Peephole optimize small accesss to aligned globals.
ClosedPublic