diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6841,9 +6841,6 @@ if (ValVT == MVT::f128) report_fatal_error("f128 is unimplemented on AIX."); - if (ArgFlags.isByVal()) - report_fatal_error("Passing structure by value is unimplemented."); - if (ArgFlags.isNest()) report_fatal_error("Nest arguments are unimplemented."); @@ -6857,6 +6854,29 @@ PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10}; + if (ArgFlags.isByVal()) { + if (ArgFlags.getNonZeroByValAlign() > PtrByteSize) + report_fatal_error("Pass-by-value arguments with alignment greater than " + "register width are not supported."); + + const unsigned ByValSize = ArgFlags.getByValSize(); + + // An empty aggregate parameter takes up no storage and no registers. + if (ByValSize == 0) + return false; + + if (ByValSize <= PtrByteSize) { + State.AllocateStack(PtrByteSize, PtrByteSize); + if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); + return false; + } + } + + report_fatal_error( + "Pass-by-value arguments are only supported in a single register."); + } + // Arguments always reserve parameter save area. switch (ValVT.SimpleTy) { default: @@ -7130,9 +7150,59 @@ CCValAssign &VA = ArgLocs[I++]; SDValue Arg = OutVals[VA.getValNo()]; + ISD::ArgFlagsTy Flags = Outs[VA.getValNo()].Flags; + const MVT LocVT = VA.getLocVT(); + const MVT ValVT = VA.getValVT(); + + if (Flags.isByVal()) { + const unsigned ByValSize = Flags.getByValSize(); + assert( + VA.isRegLoc() && ByValSize > 0 && ByValSize <= PtrByteSize && + "Pass-by-value arguments are only supported in a single register."); + + // Loads must be a power-of-2 size and cannot be larger than the + // ByValSize. For example: a 7 byte by-val arg requires 4, 2 and 1 byte + // loads. + SDValue RegVal; + for (unsigned Bytes = 0; Bytes != ByValSize;) { + unsigned N = PowerOf2Floor(ByValSize - Bytes); + const MVT VT = + N == 1 ? MVT::i8 + : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64)); + + SDValue LoadAddr = Arg; + if (Bytes != 0) { + // Adjust the load offset by the number of bytes read so far. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + LoadAddr = DAG.getNode(ISD::ADD, dl, LocVT, Arg, + DAG.getConstant(Bytes, dl, LocVT), Flags); + } + SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, LoadAddr, + MachinePointerInfo(), VT); + MemOpChains.push_back(Load.getValue(1)); - if (!VA.isRegLoc() && !VA.isMemLoc()) - report_fatal_error("Unexpected location for function call argument."); + Bytes += N; + assert(LocVT.getSizeInBits() >= (Bytes * 8)); + if (unsigned NumSHLBits = LocVT.getSizeInBits() - (Bytes * 8)) { + // By-val arguments are passed left-justfied in register. + EVT ShiftAmountTy = + getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout()); + SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy); + SDValue ShiftedLoad = + DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt); + RegVal = RegVal ? DAG.getNode(ISD::OR, dl, LocVT, RegVal, ShiftedLoad) + : ShiftedLoad; + } else { + assert(!RegVal && Bytes == ByValSize && + "Pass-by-value argument handling unexpectedly incomplete."); + RegVal = Load; + } + } + + RegsToPass.push_back(std::make_pair(VA.getLocReg(), RegVal)); + continue; + } switch (VA.getLocInfo()) { default: @@ -7165,20 +7235,20 @@ // Custom handling is used for GPR initializations for vararg float // arguments. assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && - VA.getValVT().isFloatingPoint() && VA.getLocVT().isInteger() && + ValVT.isFloatingPoint() && LocVT.isInteger() && "Unexpected register handling for calling convention."); SDValue ArgAsInt = - DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg); + DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg); - if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize()) + if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize()) // f32 in 32-bit GPR // f64 in 64-bit GPR RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt)); - else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits()) + else if (Arg.getValueType().getSizeInBits() < LocVT.getSizeInBits()) // f32 in 64-bit GPR. RegsToPass.push_back(std::make_pair( - VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT()))); + VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT))); else { // f64 in two 32-bit GPRs // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs. diff --git a/llvm/test/CodeGen/PowerPC/aix-byval-param.ll b/llvm/test/CodeGen/PowerPC/aix-byval-param.ll deleted file mode 100644 --- a/llvm/test/CodeGen/PowerPC/aix-byval-param.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s -; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s - -%struct.S = type { i32, i32 } - -define void @bar() { -entry: - %s1 = alloca %struct.S, align 4 - %agg.tmp = alloca %struct.S, align 4 - call void @foo(%struct.S* byval(%struct.S) align 4 %agg.tmp) - ret void -} - -declare void @foo(%struct.S* byval(%struct.S) align 4) - -; CHECK: LLVM ERROR: Passing structure by value is unimplemented. diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation1.ll @@ -0,0 +1,16 @@ +; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s +; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s + +%struct.S = type { [9 x i8] } + +define void @bar() { +entry: + %s1 = alloca %struct.S, align 1 + %agg.tmp = alloca %struct.S, align 1 + call void @foo(%struct.S* byval(%struct.S) align 1 %agg.tmp) + ret void +} + +declare void @foo(%struct.S* byval(%struct.S) align 1) + +; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in a single register. diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation2.ll @@ -0,0 +1,16 @@ +; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s +; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s + +%struct.S = type { [1 x i8] } + +define void @bar() { +entry: + %s1 = alloca %struct.S, align 1 + %agg.tmp = alloca %struct.S, align 1 + call void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, %struct.S* byval(%struct.S) align 1 %agg.tmp) + ret void +} + +declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, %struct.S* byval(%struct.S) align 1) + +; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in a single register. diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll @@ -0,0 +1,16 @@ +; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s +; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s + +%struct.S = type { [1 x i8] } + +define void @bar() { +entry: + %s1 = alloca %struct.S, align 32 + %agg.tmp = alloca %struct.S, align 32 + call void @foo(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, %struct.S* byval(%struct.S) align 32 %agg.tmp) + ret void +} + +declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, %struct.S* byval(%struct.S) align 32) + +; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than register width are not supported. diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval.ll @@ -0,0 +1,206 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,32BIT %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASM32PWR4 %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,64BIT %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s + +%struct.S1 = type { [1 x i8] } +@gS1 = external global %struct.S1, align 1 + +define void @call_test_byval_1Byte() { +entry: + call void @test_byval_1Byte(%struct.S1* byval(%struct.S1) align 1 @gS1) + ret void +} + +declare void @test_byval_1Byte(%struct.S1* byval(%struct.S1) align 1) + +; CHECK-LABEL: name: call_test_byval_1Byte{{.*}} + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $r3 = LBZ 0, killed renamable $r[[REG]] :: (load 1) +; 32BIT-NEXT: renamable $r3 = RLWINM killed renamable $r3, 24, 0, 7 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; CHECKASM-LABEL: .call_test_byval_1Byte: + +; ASM32PWR4: stwu 1, -64(1) +; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2) +; ASM32PWR4-NEXT: lbz 3, 0([[REG]]) +; ASM32PWR4-NEXT: slwi 3, 3, 24 +; ASM32PWR4-NEXT: bl .test_byval_1Byte +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $x3 = LBZ8 0, killed renamable $x[[REG]] :: (load 1) +; 64BIT-NEXT: renamable $x3 = RLDICR killed renamable $x3, 56, 7 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; ASM64PWR4: std 0, 16(1) +; ASM64PWR4-NEXT: stdu 1, -112(1) +; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC{{[0-9]+}}(2) +; ASM64PWR4-NEXT: lbz 3, 0([[REG]]) +; ASM64PWR4-NEXT: sldi 3, 3, 56 +; ASM64PWR4-NEXT: bl .test_byval_1Byte +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 112 + +%struct.S2 = type { [2 x i8] } + +@gS2 = external global %struct.S2, align 1 + +define void @call_test_byval_2Byte() { +entry: + call void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1 @gS2) + ret void +} + +declare void @test_byval_2Byte(%struct.S2* byval(%struct.S2) align 1) + +; CHECK-LABEL: name: call_test_byval_2Byte{{.*}} + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS2, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $r3 = LHZ 0, killed renamable $r[[REG]] :: (load 2) +; 32BIT-NEXT: renamable $r3 = RLWINM killed renamable $r3, 16, 0, 15 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; CHECKASM-LABEL: .call_test_byval_2Byte: + +; ASM32PWR4: stwu 1, -64(1) +; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2) +; ASM32PWR4-NEXT: lhz 3, 0([[REG]]) +; ASM32PWR4-NEXT: slwi 3, 3, 16 +; ASM32PWR4-NEXT: bl .test_byval_2Byte +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS2, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $x3 = LHZ8 0, killed renamable $x[[REG]] :: (load 2) +; 64BIT-NEXT: renamable $x3 = RLDICR killed renamable $x3, 48, 15 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; ASM64PWR4: std 0, 16(1) +; ASM64PWR4-NEXT: stdu 1, -112(1) +; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC{{[0-9]+}}(2) +; ASM64PWR4-NEXT: lhz 3, 0([[REG]]) +; ASM64PWR4-NEXT: sldi 3, 3, 48 +; ASM64PWR4-NEXT: bl .test_byval_2Byte +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 112 + +%struct.S3 = type { [3 x i8] } + +@gS3 = external global %struct.S3, align 1 + +define void @call_test_byval_3Byte() { +entry: + call void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1 @gS3) + ret void +} + +declare void @test_byval_3Byte(%struct.S3* byval(%struct.S3) align 1) + +; CHECK-LABEL: name: call_test_byval_3Byte{{.*}} + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: renamable $r[[REGADDR:[0-9]+]] = LWZtoc @gS3, $r2 :: (load 4 from got) +; 32BIT-DAG: renamable $r[[REG1:[0-9]+]] = LHZ 0, killed renamable $r[[REGADDR]] :: (load 2) +; 32BIT-DAG: renamable $r[[REG2:[0-9]+]] = LBZ 2, renamable $r[[REGADDR]] :: (load 1) +; 32BIT-DAG: renamable $r3 = RLWINM killed renamable $r[[REG2]], 8, 16, 23 +; 32BIT-DAG: renamable $r3 = RLWIMI killed renamable $r3, killed renamable $r[[REG1]], 16, 0, 15 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; CHECKASM-LABEL: .call_test_byval_3Byte: + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; ASM32PWR4: stwu 1, -64(1) +; ASM32PWR4-NEXT: lwz [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2) +; ASM32PWR4-DAG: lhz [[REG1:[0-9]+]], 0([[REGADDR]]) +; ASM32PWR4-DAG: lbz [[REG2:[0-9]+]], 2([[REGADDR]]) +; ASM32PWR4-DAG: rlwinm 3, [[REG2]], 8, 16, 23 +; ASM32PWR4-DAG: rlwimi 3, [[REG1]], 16, 0, 15 +; ASM32PWR4-NEXT: bl .test_byval_3Byte +; ASM32PWR4-NEXT: nop + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-DAG: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS3, $x2 :: (load 8 from got) +; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LHZ8 0, killed renamable $x[[REGADDR]] :: (load 2) +; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LBZ8 2, renamable $x[[REGADDR]] :: (load 1) +; 64BIT-DAG: renamable $x3 = RLDIC killed renamable $x[[REG2]], 40, 16 +; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 48, 0 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; ASM64PWR4: stdu 1, -112(1) +; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2) +; ASM64PWR4-DAG: lhz [[REG1:[0-9]+]], 0([[REGADDR]]) +; ASM64PWR4-DAG: lbz [[REG2:[0-9]+]], 2([[REGADDR]]) +; ASM64PWR4-DAG: rldic 3, [[REG2]], 40, 16 +; ASM64PWR4-DAG: rldimi 3, [[REG1]], 48, 0 +; ASM64PWR4-NEXT: bl .test_byval_3Byte +; ASM64PWR4-NEXT: nop + +%struct.S4 = type { [4 x i8] } + +@gS4 = external global %struct.S4, align 1 + +define void @call_test_byval_4Byte() { +entry: + call void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1 @gS4) + ret void +} + +declare void @test_byval_4Byte(%struct.S4* byval(%struct.S4) align 1) + +; CHECK-LABEL: name: call_test_byval_4Byte{{.*}} + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @gS4, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $r3 = LWZ 0, killed renamable $r[[REG]] :: (load 4) +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; CHECKASM-LABEL: .call_test_byval_4Byte: + +; ASM32PWR4: stwu 1, -64(1) +; ASM32PWR4-NEXT: lwz [[REG:[0-9]+]], LC{{[0-9]+}}(2) +; ASM32PWR4-NEXT: lwz 3, 0([[REG]]) +; ASM32PWR4-NEXT: bl .test_byval_4Byte +; ASM32PWR4-NEXT: nop +; ASM32PWR4-NEXT: addi 1, 1, 64 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: renamable $x[[REG:[0-9]+]] = LDtoc @gS4, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $x3 = LWZ8 0, killed renamable $x[[REG]] :: (load 4) +; 64BIT-NEXT: renamable $x3 = RLDICR killed renamable $x3, 32, 31 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; ASM64PWR4: stdu 1, -112(1) +; ASM64PWR4-NEXT: ld [[REG:[0-9]+]], LC{{[0-9]+}}(2) +; ASM64PWR4-NEXT: lwz 3, 0([[REG]]) +; ASM64PWR4-NEXT: sldi 3, 3, 32 +; ASM64PWR4-NEXT: bl .test_byval_4Byte +; ASM64PWR4-NEXT: nop +; ASM64PWR4-NEXT: addi 1, 1, 112 diff --git a/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll b/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix64-cc-byval.ll @@ -0,0 +1,146 @@ +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,64BIT %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASM64PWR4 %s + +%struct.S5 = type { [5 x i8] } + +@gS5 = external global %struct.S5, align 1 + +define void @call_test_byval_5Byte() { +entry: + call void @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1 @gS5) + ret void +} + +declare void @test_byval_5Byte(%struct.S5* byval(%struct.S5) align 1) + +; CHECK-LABEL: name: call_test_byval_5Byte{{.*}} + +; CHECKASM-LABEL: .call_test_byval_5Byte: + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS5, $x2 :: (load 8 from got) +; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4) +; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LBZ8 4, renamable $x[[REGADDR]] :: (load 1) +; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 24, 0, 7 +; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; ASM64PWR4: stdu 1, -112(1) +; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2) +; ASM64PWR4-DAG: lwz [[REG1:[0-9]+]], 0([[REGADDR]]) +; ASM64PWR4-DAG: lbz [[REG2:[0-9]+]], 4([[REGADDR]]) +; ASM64PWR4-DAG: rlwinm 3, [[REG2]], 24, 0, 7 +; ASM64PWR4-DAG: rldimi 3, [[REG1]], 32, 0 +; ASM64PWR4-NEXT: bl .test_byval_5Byte +; ASM64PWR4-NEXT: nop + +%struct.S6 = type { [6 x i8] } + +@gS6 = external global %struct.S6, align 1 + +define void @call_test_byval_6Byte() { +entry: + call void @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1 @gS6) + ret void +} + +declare void @test_byval_6Byte(%struct.S6* byval(%struct.S6) align 1) + +; CHECK-LABEL: name: call_test_byval_6Byte{{.*}} + +; CHECKASM-LABEL: .call_test_byval_6Byte: + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS6, $x2 :: (load 8 from got) +; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4) +; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2) +; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG2]], 16, 0, 15 +; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; ASM64PWR4: stdu 1, -112(1) +; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2) +; ASM64PWR4-DAG: lwz [[REG1:[0-9]+]], 0([[REGADDR]]) +; ASM64PWR4-DAG: lhz [[REG2:[0-9]+]], 4([[REGADDR]]) +; ASM64PWR4-DAG: rlwinm 3, [[REG2]], 16, 0, 15 +; ASM64PWR4-DAG: rldimi 3, [[REG1]], 32, 0 +; ASM64PWR4-NEXT: bl .test_byval_6Byte +; ASM64PWR4-NEXT: nop + +%struct.S7 = type { [7 x i8] } + +@gS7 = external global %struct.S7, align 1 + +define void @call_test_byval_7Byte() { +entry: + call void @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1 @gS7) + ret void +} + +declare void @test_byval_7Byte(%struct.S7* byval(%struct.S7) align 1) + +; CHECK-LABEL: name: call_test_byval_7Byte{{.*}} + +; CHECKASM-LABEL: .call_test_byval_7Byte: + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS7, $x2 :: (load 8 from got) +; 64BIT-DAG: renamable $x[[REG1:[0-9]+]] = LWZ8 0, killed renamable $x[[REGADDR]] :: (load 4) +; 64BIT-DAG: renamable $x[[REG2:[0-9]+]] = LHZ8 4, renamable $x[[REGADDR]] :: (load 2) +; 64BIT-DAG: renamable $x[[REG3:[0-9]+]] = LBZ8 6, renamable $x[[REGADDR]] :: (load 1) +; 64BIT-DAG: renamable $x3 = RLWINM8 killed renamable $x[[REG3]], 8, 16, 23 +; 64BIT-DAG: renamable $x3 = RLWIMI8 killed renamable $x3, killed renamable $x[[REG2]], 16, 0, 15 +; 64BIT-DAG: renamable $x3 = RLDIMI killed renamable $x3, killed renamable $x[[REG1]], 32, 0 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings. +; ASM64PWR4: stdu 1, -112(1) +; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2) +; ASM64PWR4-DAG: lwz [[REG1:[0-9]+]], 0([[REGADDR]]) +; ASM64PWR4-DAG: lhz [[REG2:[0-9]+]], 4([[REGADDR]]) +; ASM64PWR4-DAG: lbz [[REG3:[0-9]+]], 6([[REGADDR]]) +; ASM64PWR4-DAG: rlwinm 3, [[REG3]], 8, 16, 23 +; ASM64PWR4-DAG: rlwimi 3, [[REG2]], 16, 0, 15 +; ASM64PWR4-DAG: rldimi 3, [[REG1]], 32, 0 +; ASM64PWR4-NEXT: bl .test_byval_7Byte +; ASM64PWR4-NEXT: nop + +%struct.S8 = type { [8 x i8] } + +@gS8 = external global %struct.S8, align 1 + +define void @call_test_byval_8Byte() { +entry: + call void @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1 @gS8) + ret void +} + +declare void @test_byval_8Byte(%struct.S8* byval(%struct.S8) align 1) + +; CHECK-LABEL: name: call_test_byval_8Byte{{.*}} + +; CHECKASM-LABEL: .call_test_byval_8Byte: + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: renamable $x[[REGADDR:[0-9]+]] = LDtoc @gS8, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $x3 = LD 0, killed renamable $x[[REGADDR]] :: (load 8) +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +; ASM64PWR4: stdu 1, -112(1) +; ASM64PWR4-NEXT: ld [[REGADDR:[0-9]+]], LC{{[0-9]+}}(2) +; ASM64PWR4-NEXT: ld 3, 0([[REGADDR]]) +; ASM64PWR4-NEXT: bl .test_byval_8Byte +; ASM64PWR4-NEXT: nop