Diff 251089

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,835 Lines • ▼ Show 20 Lines	static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,

assert((!ValVT.isInteger() \|\|		assert((!ValVT.isInteger() \|\|
(ValVT.getSizeInBits() <= RegVT.getSizeInBits())) &&		(ValVT.getSizeInBits() <= RegVT.getSizeInBits())) &&
"Integer argument exceeds register size: should have been legalized");		"Integer argument exceeds register size: should have been legalized");

if (ValVT == MVT::f128)		if (ValVT == MVT::f128)
report_fatal_error("f128 is unimplemented on AIX.");		report_fatal_error("f128 is unimplemented on AIX.");

if (ArgFlags.isByVal())
report_fatal_error("Passing structure by value is unimplemented.");

if (ArgFlags.isNest())		if (ArgFlags.isNest())
report_fatal_error("Nest arguments are unimplemented.");		report_fatal_error("Nest arguments are unimplemented.");

if (ValVT.isVector() \|\| LocVT.isVector())		if (ValVT.isVector() \|\| LocVT.isVector())
report_fatal_error("Vector arguments are unimplemented on AIX.");		report_fatal_error("Vector arguments are unimplemented on AIX.");

static const MCPhysReg GPR_32[] = {// 32-bit registers.		static const MCPhysReg GPR_32[] = {// 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,		PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10};		PPC::R7, PPC::R8, PPC::R9, PPC::R10};
static const MCPhysReg GPR_64[] = {// 64-bit registers.		static const MCPhysReg GPR_64[] = {// 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,		PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10};		PPC::X7, PPC::X8, PPC::X9, PPC::X10};

		if (ArgFlags.isByVal()) {
		if (ArgFlags.getNonZeroByValAlign() > PtrByteSize)
		report_fatal_error("Pass-by-value arguments with alignment greater than "
		"register width are not supported.");

		const unsigned ByValSize = ArgFlags.getByValSize();

		// An empty aggregate parameter takes up no storage and no registers.
		if (ByValSize == 0)
		return false;

		if (ByValSize <= PtrByteSize) {
		State.AllocateStack(PtrByteSize, PtrByteSize);
		if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
		State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
		return false;
		}
		}

		report_fatal_error(
		"Pass-by-value arguments are only supported in a single register.");
		}

// Arguments always reserve parameter save area.		// Arguments always reserve parameter save area.
switch (ValVT.SimpleTy) {		switch (ValVT.SimpleTy) {
default:		default:
report_fatal_error("Unhandled value type for argument.");		report_fatal_error("Unhandled value type for argument.");
case MVT::i64:		case MVT::i64:
// i64 arguments should have been split to i32 for PPC32.		// i64 arguments should have been split to i32 for PPC32.
assert(IsPPC64 && "PPC32 should have split i64 values.");		assert(IsPPC64 && "PPC32 should have split i64 values.");
LLVM_FALLTHROUGH;		LLVM_FALLTHROUGH;
▲ Show 20 Lines • Show All 257 Lines • ▼ Show 20 Lines	SDValue PPCTargetLowering::LowerCall_AIX(
// passing.		// passing.
const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)		const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
: DAG.getRegister(PPC::R1, MVT::i32);		: DAG.getRegister(PPC::R1, MVT::i32);

for (unsigned I = 0, E = ArgLocs.size(); I != E;) {		for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
CCValAssign &VA = ArgLocs[I++];		CCValAssign &VA = ArgLocs[I++];

SDValue Arg = OutVals[VA.getValNo()];		SDValue Arg = OutVals[VA.getValNo()];
		ISD::ArgFlagsTy Flags = Outs[VA.getValNo()].Flags;
		const MVT LocVT = VA.getLocVT();
		const MVT ValVT = VA.getValVT();

		sfertileUnsubmitted Done Reply Inline Actions I don't think we need an assertion like this: CCValAssign is either a RegLoc or a MemLoc. by the class construction. Its not possible to create an object with both of these false. If the class design ever changes such that there are other options then we can add an assertion then. sfertile: I don't think we need an assertion like this: CCValAssign is either a RegLoc or a MemLoc. by…
if (!VA.isRegLoc() && !VA.isMemLoc())		if (Flags.isByVal()) {
report_fatal_error("Unexpected location for function call argument.");		const unsigned ByValSize = Flags.getByValSize();
		assert(
		VA.isRegLoc() && ByValSize > 0 && ByValSize <= PtrByteSize &&
		"Pass-by-value arguments are only supported in a single register.");

		// Loads must be a power-of-2 size and cannot be larger than the
		// ByValSize. For example: a 7 byte by-val arg requires 4, 2 and 1 byte
		// loads.
		SDValue RegVal;
		for (unsigned Bytes = 0; Bytes != ByValSize;) {
		sfertileUnsubmitted Not Done Reply Inline Actions Could we do a larger load if the alignment is greater then the size? For example if we have a size of 7 and a type alignment of 1, but the argument is an auto local and the compiler increase the alignment to 8. I don't suggest making any such changes to this patch if it allowed, but something to consider once we have everything working. sfertile: Could we do a larger load if the alignment is greater then the size? For example if we have a…
		cebowleratibmAuthorUnsubmitted Done Reply Inline Actions The size should be a multiple of the alignment. We can do a single load if we can prove PtrByteSize bytes are accessible from the argument address. The auto local case you mention is probably the one worth looking into but I agree it's beyond the scope of this patch to optimize this case. cebowleratibm: The size should be a multiple of the alignment. We can do a single load if we can prove…
		unsigned N = PowerOf2Floor(ByValSize - Bytes);
		const MVT VT =
		N == 1 ? MVT::i8
		cebowleratibmAuthorUnsubmitted Done Reply Inline Actions To reviewers, I thought I had a bug that we may choose a PowerOf2Floor that's larger than a register. This will be a bug, however, for the time being, we're only processing ByValSize <= PtrByteSize. cebowleratibm: To reviewers, I thought I had a bug that we may choose a PowerOf2Floor that's larger than a…
		: ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));

		SDValue LoadAddr = Arg;
		if (Bytes != 0) {
		// Adjust the load offset by the number of bytes read so far.
		SDNodeFlags Flags;
		Flags.setNoUnsignedWrap(true);
		sfertileUnsubmitted Done Reply Inline Actions suggestion: `Move the load over` --> `Adjust the load offset` sfertile: suggestion: `Move the load over ` --> `Adjust the load offset`
		LoadAddr = DAG.getNode(ISD::ADD, dl, LocVT, Arg,
		DAG.getConstant(Bytes, dl, LocVT), Flags);
		}
		SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, LoadAddr,
		MachinePointerInfo(), VT);
		MemOpChains.push_back(Load.getValue(1));
		sfertileUnsubmitted Done Reply Inline Actions IIUC EXTLOAD is safe for the first load because it will be left justified in the register after the shift, but the later loads need the upper bits defined to zero because we are ORing them into those shifted bits. sfertile: IIUC EXTLOAD is safe for the first load because it will be left justified in the register after…
		cebowleratibmAuthorUnsubmitted Done Reply Inline Actions Good catch. Using ZEXTLOAD improved the codegen to use rotate/mask inserts to assemble the byval in register. See the test updates on the next revision. cebowleratibm: Good catch. Using ZEXTLOAD improved the codegen to use rotate/mask inserts to assemble the…

		Bytes += N;
		assert(LocVT.getSizeInBits() >= (Bytes * 8));
		if (unsigned NumSHLBits = LocVT.getSizeInBits() - (Bytes * 8)) {
		sfertileUnsubmitted Done Reply Inline Actions Why the name `PaddingInBits`? Isn't this is the shift size to justify the bytes just loaded, where does padding come in? sfertile: Why the name `PaddingInBits`? Isn't this is the shift size to justify the bytes just loaded…
		cebowleratibmAuthorUnsubmitted Done Reply Inline Actions "Padding" was meant to indicate the number of zero extended bits from the load that need to be left shifted to get the loaded bits into the correct bit offset. I agree the term "Padding" is misleading. I'll rename to "NumSHLBits". cebowleratibm: "Padding" was meant to indicate the number of zero extended bits from the load that need to be…
		// By-val arguments are passed left-justfied in register.
		EVT ShiftAmountTy =
		getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
		SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
		SDValue ShiftedLoad =
		DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
		RegVal = RegVal ? DAG.getNode(ISD::OR, dl, LocVT, RegVal, ShiftedLoad)
		: ShiftedLoad;
		ZarkoCAUnsubmitted Done Reply Inline Actions I'm curious as to what happens with 0 sized parameter, will they need special handling like they do on the callee side or do they need to be skipped? ZarkoCA: I'm curious as to what happens with 0 sized parameter, will they need special handling like…
		cebowleratibmAuthorUnsubmitted Done Reply Inline Actions We should skip 0 size by-val arguments in CC_AIX and assert non-zero by-val size here. cebowleratibm: We should skip 0 size by-val arguments in CC_AIX and assert non-zero by-val size here.
		} else {
		assert(!RegVal && Bytes == ByValSize &&
		"Pass-by-value argument handling unexpectedly incomplete.");
		RegVal = Load;
		}
		}

		RegsToPass.push_back(std::make_pair(VA.getLocReg(), RegVal));
		continue;
		}

switch (VA.getLocInfo()) {		switch (VA.getLocInfo()) {
default:		default:
report_fatal_error("Unexpected argument extension type.");		report_fatal_error("Unexpected argument extension type.");
case CCValAssign::Full:		case CCValAssign::Full:
break;		break;
case CCValAssign::ZExt:		case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);		Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
Show All 16 Lines	if (VA.isMemLoc()) {
DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));		DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));

continue;		continue;
}		}

// Custom handling is used for GPR initializations for vararg float		// Custom handling is used for GPR initializations for vararg float
// arguments.		// arguments.
assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&		assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
VA.getValVT().isFloatingPoint() && VA.getLocVT().isInteger() &&		ValVT.isFloatingPoint() && LocVT.isInteger() &&
"Unexpected register handling for calling convention.");		"Unexpected register handling for calling convention.");

SDValue ArgAsInt =		SDValue ArgAsInt =
DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);		DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);

if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())		if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
// f32 in 32-bit GPR		// f32 in 32-bit GPR
// f64 in 64-bit GPR		// f64 in 64-bit GPR
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));		RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())		else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits())
// f32 in 64-bit GPR.		// f32 in 64-bit GPR.
RegsToPass.push_back(std::make_pair(		RegsToPass.push_back(std::make_pair(
VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));		VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
else {		else {
// f64 in two 32-bit GPRs		// f64 in two 32-bit GPRs
// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.		// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&		assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
"Unexpected custom register for argument!");		"Unexpected custom register for argument!");
CCValAssign &GPR1 = VA;		CCValAssign &GPR1 = VA;
SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,		SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
DAG.getConstant(32, dl, MVT::i8));		DAG.getConstant(32, dl, MVT::i8));
▲ Show 20 Lines • Show All 8,730 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/aix-byval-param.ll

This file was deleted.

	; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 \| FileCheck %s
	; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 \| FileCheck %s

	%struct.S = type { i32, i32 }

	define void @bar() {
	entry:
	%s1 = alloca %struct.S, align 4
	%agg.tmp = alloca %struct.S, align 4
	call void @foo(%struct.S* byval(%struct.S) align 4 %agg.tmp)
	ret void
	}

	declare void @foo(%struct.S* byval(%struct.S) align 4)

	; CHECK: LLVM ERROR: Passing structure by value is unimplemented.

llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll

This file was added.

				; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 \| FileCheck %s
				; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 \| FileCheck %s

				%struct.S = type { [9 x i8] }

				define void @bar() {
				entry:
				%s1 = alloca %struct.S, align 1
				%agg.tmp = alloca %struct.S, align 1
				call void @foo(%struct.S* byval(%struct.S) align 1 %agg.tmp)
				ret void
				}

				declare void @foo(%struct.S* byval(%struct.S) align 1)

				; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in a single register.

llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll

This file was added.

				; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 \| FileCheck %s
				; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 \| FileCheck %s

				%struct.S = type { [1 x i8] }

				define void @bar() {
				entry:
				%s1 = alloca %struct.S, align 1
				%agg.tmp = alloca %struct.S, align 1
				call void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, %struct.S* byval(%struct.S) align 1 %agg.tmp)
				ret void
				}

				declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, %struct.S* byval(%struct.S) align 1)

				; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in a single register.

llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll

This file was added.

				; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 \| FileCheck %s
				; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 \| FileCheck %s

				%struct.S = type { [1 x i8] }

				define void @bar() {
				entry:
				%s1 = alloca %struct.S, align 32
				%agg.tmp = alloca %struct.S, align 32
				call void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, %struct.S* byval(%struct.S) align 32 %agg.tmp)
				ret void
				}

				declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, %struct.S* byval(%struct.S) align 32)

				; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than register width are not supported.

llvm/test/CodeGen/PowerPC/aix-cc-byval.ll

This file was added.

				; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s \| \
				; RUN: FileCheck --check-prefixes=CHECK,32BIT %s
				ZarkoCAUnsubmitted Done Reply Inline Actions Caught by the automatic testing bots, this test doesn't have a RUN step. ZarkoCA: Caught by the automatic testing bots, this test doesn't have a RUN step.
				cebowleratibmAuthorUnsubmitted Done Reply Inline Actions I missed the run commands. Need to add them and update review. cebowleratibm: I missed the run commands. Need to add them and update review.

				; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
				; RUN: -mtriple powerpc-ibm-aix-xcoff < %s \| \
				; RUN: FileCheck --check-prefixes=CHECKASM,ASM32PWR4 %s

				; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s \| \
				; RUN: FileCheck --check-prefixes=CHECK,64BIT %s

				; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
				; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s \| \
				; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s

				%struct.S1 = type { [1 x i8] }
				@gS1 = external global %struct.S1, align 1

				define void @call_test_byval_1Byte() {
				entry:
				call void @test_byval_1Byte(%struct.S1* byval(%struct.S1) align 1 @gS1)
				ret void
				}

				declare void @test_byval_1Byte(%struct.S1* byval(%struct.S1) align 1)

				; CHECK-LABEL: name: call_test_byval_1Byte{{.*}}

				; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
				; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS1, $r2 :: (load 4 from got)
				; 32BIT-NEXT: renamable $r3 = LBZ 0, killed renamable $r[[REG]] :: (load 1)
				; 32BIT-NEXT: renamable $r3 = RLWINM killed renamable $r3, 24, 0, 7
				; 32BIT-NEXT: BL_NOP <mcsymbol .test_byval_1Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
				; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1

				; CHECKASM-LABEL: .call_test_byval_1Byte:

				; ASM32PWR4: stwu 1, -64(1)
				; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM32PWR4-NEXT: lbz 3, 0([[REG]])
				sfertileUnsubmitted Done Reply Inline Actions IDX isn't used again. You can use just the regex to match the label without having to store it in a file check variable. Ditto on the other filecheck variables that aren't used after capture. sfertile: IDX isn't used again. You can use just the regex to match the label without having to store it…
				; ASM32PWR4-NEXT: slwi 3, 3, 24
				; ASM32PWR4-NEXT: bl .test_byval_1Byte
				; ASM32PWR4-NEXT: nop
				; ASM32PWR4-NEXT: addi 1, 1, 64

				; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
				; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS1, $x2 :: (load 8 from got)
				; 64BIT-NEXT: renamable $x3 = LBZ8 0, killed renamable $x[[REG]] :: (load 1)
				; 64BIT-NEXT: renamable $x3 = RLDICR killed renamable $x3, 56, 7
				; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_1Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
				; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1

				; ASM64PWR4: std 0, 16(1)
				; ASM64PWR4-NEXT: stdu 1, -112(1)
				; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM64PWR4-NEXT: lbz 3, 0([[REG]])
				; ASM64PWR4-NEXT: sldi 3, 3, 56
				; ASM64PWR4-NEXT: bl .test_byval_1Byte
				; ASM64PWR4-NEXT: nop
				; ASM64PWR4-NEXT: addi 1, 1, 112

				%struct.S2 = type { [2 x i8] }

				@gS2 = external global %struct.S2, align 1

				define void @call_test_byval_2Byte() {
				entry:
				call void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1 @gS2)
				ret void
				}

				declare void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1)

				; CHECK-LABEL: name: call_test_byval_2Byte{{.*}}

				; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
				; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS2, $r2 :: (load 4 from got)
				; 32BIT-NEXT: renamable $r3 = LHZ 0, killed renamable $r[[REG]] :: (load 2)
				; 32BIT-NEXT: renamable $r3 = RLWINM killed renamable $r3, 16, 0, 15
				; 32BIT-NEXT: BL_NOP <mcsymbol .test_byval_2Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
				; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1

				; CHECKASM-LABEL: .call_test_byval_2Byte:

				; ASM32PWR4: stwu 1, -64(1)
				; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM32PWR4-NEXT: lhz 3, 0([[REG]])
				; ASM32PWR4-NEXT: slwi 3, 3, 16
				; ASM32PWR4-NEXT: bl .test_byval_2Byte
				; ASM32PWR4-NEXT: nop
				; ASM32PWR4-NEXT: addi 1, 1, 64

				; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
				; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS2, $x2 :: (load 8 from got)
				; 64BIT-NEXT: renamable $x3 = LHZ8 0, killed renamable $x[[REG]] :: (load 2)
				; 64BIT-NEXT: renamable $x3 = RLDICR killed renamable $x3, 48, 15
				; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_2Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
				; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1

				; ASM64PWR4: std 0, 16(1)
				; ASM64PWR4-NEXT: stdu 1, -112(1)
				; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM64PWR4-NEXT: lhz 3, 0([[REG]])
				; ASM64PWR4-NEXT: sldi 3, 3, 48
				; ASM64PWR4-NEXT: bl .test_byval_2Byte
				; ASM64PWR4-NEXT: nop
				; ASM64PWR4-NEXT: addi 1, 1, 112

				%struct.S3 = type { [3 x i8] }

				@gS3 = external global %struct.S3, align 1

				define void @call_test_byval_3Byte() {
				entry:
				call void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1 @gS3)
				ret void
				}

				declare void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1)

				; CHECK-LABEL: name: call_test_byval_3Byte{{.*}}

				; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
				; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
				; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS3, $r2 :: (load 4 from got)
				; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LHZ 0, killed renamable $r[[REGADDR]] :: (load 2)
				; 32BIT-DAG: renamable $r[[REG2:[0-9]+]] = LBZ 2, renamable $r[[REGADDR]] :: (load 1)
				sfertileUnsubmitted Done Reply Inline Actions Can we drop the 'BYTE' part of REG2BYTE and REG1BYTE? sfertile: Can we drop the 'BYTE' part of REG2BYTE and REG1BYTE?
				cebowleratibmAuthorUnsubmitted Done Reply Inline Actions I'll just use REG1, REG2 ... as per usual generic reg names. The reader will need to chase down the contents. I found it useful to name the regs by content as I tried to keep everything straight in my head, but I don't mind the more condensed form. cebowleratibm: I'll just use REG1, REG2 ... as per usual generic reg names. The reader will need to chase…
				; 32BIT-DAG: renamable $r3 = RLWINM killed renamable $r[[REG2]], 8, 16, 23
				; 32BIT-DAG: renamable $r3 = RLWIMI killed renamable $r3, killed renamable $r[[REG1]], 16, 0, 15
				; 32BIT-NEXT: BL_NOP <mcsymbol .test_byval_3Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
				; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1

				; CHECKASM-LABEL: .call_test_byval_3Byte:

				; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
				; ASM32PWR4: stwu 1, -64(1)
				; ASM32PWR4-NEXT: lwz [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM32PWR4-DAG: lhz [[REG1:[0-9]+]], 0([[REGADDR]])
				sfertileUnsubmitted Done Reply Inline Actions `LC2(2)` --> `LC{{[0-9]+}}(2)` sfertile: `LC2(2)` --> `LC{{[0-9]+}}(2)`
				; ASM32PWR4-DAG: lbz [[REG2:[0-9]+]], 2([[REGADDR]])
				; ASM32PWR4-DAG: rlwinm 3, [[REG2]], 8, 16, 23
				; ASM32PWR4-DAG: rlwimi 3, [[REG1]], 16, 0, 15
				; ASM32PWR4-NEXT: bl .test_byval_3Byte
				; ASM32PWR4-NEXT: nop

				; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
				; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
				; 64BIT-DAG: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS3, $x2 :: (load 8 from got)
				; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LHZ8 0, killed renamable $x[[REGADDR]] :: (load 2)
				; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LBZ8 2, renamable $x[[REGADDR]] :: (load 1)
				; 64BIT-DAG: renamable $x3 = RLDIC killed renamable $x[[REG2]], 40, 16
				; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 48, 0
				; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_3Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
				; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1

				; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
				; ASM64PWR4: stdu 1, -112(1)
				; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM64PWR4-DAG: lhz [[REG1:[0-9]+]], 0([[REGADDR]])
				sfertileUnsubmitted Done Reply Inline Actions Same fix mentioned above with 'LC2(2)'. sfertile: Same fix mentioned above with 'LC2(2)'.
				; ASM64PWR4-DAG: lbz [[REG2:[0-9]+]], 2([[REGADDR]])
				; ASM64PWR4-DAG: rldic 3, [[REG2]], 40, 16
				; ASM64PWR4-DAG: rldimi 3, [[REG1]], 48, 0
				; ASM64PWR4-NEXT: bl .test_byval_3Byte
				; ASM64PWR4-NEXT: nop

				%struct.S4 = type { [4 x i8] }

				@gS4 = external global %struct.S4, align 1

				define void @call_test_byval_4Byte() {
				entry:
				call void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1 @gS4)
				ret void
				}

				declare void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1)

				; CHECK-LABEL: name: call_test_byval_4Byte{{.*}}

				; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
				; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS4, $r2 :: (load 4 from got)
				; 32BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $r[[REG]] :: (load 4)
				; 32BIT-NEXT: BL_NOP <mcsymbol .test_byval_4Byte>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
				; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1

				; CHECKASM-LABEL: .call_test_byval_4Byte:

				; ASM32PWR4: stwu 1, -64(1)
				; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM32PWR4-NEXT: lwz 3, 0([[REG]])
				; ASM32PWR4-NEXT: bl .test_byval_4Byte
				; ASM32PWR4-NEXT: nop
				; ASM32PWR4-NEXT: addi 1, 1, 64

				; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
				; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS4, $x2 :: (load 8 from got)
				; 64BIT-NEXT: renamable $x3 = LWZ8 0, killed renamable $x[[REG]] :: (load 4)
				; 64BIT-NEXT: renamable $x3 = RLDICR killed renamable $x3, 32, 31
				; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_4Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
				; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
				ZarkoCAUnsubmitted Done Reply Inline Actions looks like a git artifact here ZarkoCA: looks like a git artifact here

				; ASM64PWR4: stdu 1, -112(1)
				; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM64PWR4-NEXT: lwz 3, 0([[REG]])
				; ASM64PWR4-NEXT: sldi 3, 3, 32
				; ASM64PWR4-NEXT: bl .test_byval_4Byte
				; ASM64PWR4-NEXT: nop
				; ASM64PWR4-NEXT: addi 1, 1, 112

llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll

This file was added.

				; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s \| \
				; RUN: FileCheck --check-prefixes=CHECK,64BIT %s

				; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \
				; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s \| \
				; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s

				%struct.S5 = type { [5 x i8] }

				@gS5 = external global %struct.S5, align 1

				define void @call_test_byval_5Byte() {
				entry:
				call void @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1 @gS5)
				ret void
				}

				declare void @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1)

				; CHECK-LABEL: name: call_test_byval_5Byte{{.*}}

				; CHECKASM-LABEL: .call_test_byval_5Byte:

				; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
				; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
				; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS5, $x2 :: (load 8 from got)
				; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
				; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LBZ8 4, renamable $x[[REGADDR]] :: (load 1)
				sfertileUnsubmitted Done Reply Inline Actions Same suggestion about dropping 'BYTE' from the variables name. sfertile: Same suggestion about dropping 'BYTE' from the variables name.
				; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 24, 0, 7
				; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
				; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_5Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
				; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1

				; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
				; ASM64PWR4: stdu 1, -112(1)
				; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM64PWR4-DAG: lwz [[REG1:[0-9]+]], 0([[REGADDR]])
				sfertileUnsubmitted Done Reply Inline Actions Same fix on the label here and elsewhere in the test : `LC{{[0-9]+}}` sfertile: Same fix on the label here and elsewhere in the test : `LC{{[0-9]+}}`
				; ASM64PWR4-DAG: lbz [[REG2:[0-9]+]], 4([[REGADDR]])
				; ASM64PWR4-DAG: rlwinm 3, [[REG2]], 24, 0, 7
				; ASM64PWR4-DAG: rldimi 3, [[REG1]], 32, 0
				; ASM64PWR4-NEXT: bl .test_byval_5Byte
				; ASM64PWR4-NEXT: nop

				%struct.S6 = type { [6 x i8] }

				@gS6 = external global %struct.S6, align 1

				define void @call_test_byval_6Byte() {
				entry:
				call void @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1 @gS6)
				ret void
				}

				declare void @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1)

				; CHECK-LABEL: name: call_test_byval_6Byte{{.*}}

				; CHECKASM-LABEL: .call_test_byval_6Byte:

				; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
				; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
				; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS6, $x2 :: (load 8 from got)
				; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
				; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2)
				; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 16, 0, 15
				; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
				; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_6Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
				; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1

				; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
				; ASM64PWR4: stdu 1, -112(1)
				; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM64PWR4-DAG: lwz [[REG1:[0-9]+]], 0([[REGADDR]])
				; ASM64PWR4-DAG: lhz [[REG2:[0-9]+]], 4([[REGADDR]])
				; ASM64PWR4-DAG: rlwinm 3, [[REG2]], 16, 0, 15
				; ASM64PWR4-DAG: rldimi 3, [[REG1]], 32, 0
				; ASM64PWR4-NEXT: bl .test_byval_6Byte
				; ASM64PWR4-NEXT: nop

				%struct.S7 = type { [7 x i8] }

				@gS7 = external global %struct.S7, align 1

				define void @call_test_byval_7Byte() {
				entry:
				call void @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1 @gS7)
				ret void
				}

				declare void @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1)

				; CHECK-LABEL: name: call_test_byval_7Byte{{.*}}

				; CHECKASM-LABEL: .call_test_byval_7Byte:

				; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
				; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
				; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS7, $x2 :: (load 8 from got)
				; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4)
				; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2)
				; 64BIT-DAG: renamable $x[[REG3:[0-9]+]] = LBZ8 6, renamable $x[[REGADDR]] :: (load 1)
				; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG3]], 8, 16, 23
				; 64BIT-DAG: renamable $x3 = RLWIMI8 killed renamable $x3, killed renamable $x[[REG2]], 16, 0, 15
				; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0
				; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_7Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
				; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1

				; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
				; ASM64PWR4: stdu 1, -112(1)
				; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM64PWR4-DAG: lwz [[REG1:[0-9]+]], 0([[REGADDR]])
				; ASM64PWR4-DAG: lhz [[REG2:[0-9]+]], 4([[REGADDR]])
				; ASM64PWR4-DAG: lbz [[REG3:[0-9]+]], 6([[REGADDR]])
				; ASM64PWR4-DAG: rlwinm 3, [[REG3]], 8, 16, 23
				; ASM64PWR4-DAG: rlwimi 3, [[REG2]], 16, 0, 15
				; ASM64PWR4-DAG: rldimi 3, [[REG1]], 32, 0
				; ASM64PWR4-NEXT: bl .test_byval_7Byte
				; ASM64PWR4-NEXT: nop

				%struct.S8 = type { [8 x i8] }

				@gS8 = external global %struct.S8, align 1

				define void @call_test_byval_8Byte() {
				entry:
				call void @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1 @gS8)
				ret void
				}

				declare void @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1)

				; CHECK-LABEL: name: call_test_byval_8Byte{{.*}}

				; CHECKASM-LABEL: .call_test_byval_8Byte:

				; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
				; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS8, $x2 :: (load 8 from got)
				; 64BIT-NEXT: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8)
				; 64BIT-NEXT: BL8_NOP <mcsymbol .test_byval_8Byte>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
				; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1

				; ASM64PWR4: stdu 1, -112(1)
				; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2)
				; ASM64PWR4-NEXT: ld 3, 0([[REGADDR]])
				; ASM64PWR4-NEXT: bl .test_byval_8Byte
				; ASM64PWR4-NEXT: nop

This is an archive of the discontinued LLVM Phabricator instance.

[AIX] Implement by-val caller arguments in a single register
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 251089

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/test/CodeGen/PowerPC/aix-byval-param.ll

llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll

llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll

llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll

llvm/test/CodeGen/PowerPC/aix-cc-byval.ll

llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AIX] Implement by-val caller arguments in a single registerClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 251089

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/test/CodeGen/PowerPC/aix-byval-param.ll

llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll

llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll

llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll

llvm/test/CodeGen/PowerPC/aix-cc-byval.ll

llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll

[AIX] Implement by-val caller arguments in a single register
ClosedPublic