diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6538,7 +6538,9 @@ const PPCSubtarget &Subtarget = static_cast( State.getMachineFunction().getSubtarget()); const bool IsPPC64 = Subtarget.isPPC64(); - const Align PtrAlign = IsPPC64 ? Align(8) : Align(4); + const unsigned PtrSize = IsPPC64 ? 8 : 4; + const Align PtrAlign(PtrSize); + const Align StackAlign(16); const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; if (ValVT == MVT::f128) @@ -6559,12 +6561,16 @@ PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13}; + const ArrayRef GPRs = IsPPC64 ? GPR_64 : GPR_32; + if (ArgFlags.isByVal()) { - if (ArgFlags.getNonZeroByValAlign() > PtrAlign) + const Align ByValAlign(ArgFlags.getNonZeroByValAlign()); + if (ByValAlign > StackAlign) report_fatal_error("Pass-by-value arguments with alignment greater than " - "register width are not supported."); + "16 are not supported."); const unsigned ByValSize = ArgFlags.getByValSize(); + const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign; // An empty aggregate parameter takes up no storage and no registers, // but needs a MemLoc for a stack slot for the formal arguments side. @@ -6575,11 +6581,23 @@ return false; } - const unsigned StackSize = alignTo(ByValSize, PtrAlign); - unsigned Offset = State.AllocateStack(StackSize, PtrAlign); - for (const unsigned E = Offset + StackSize; Offset < E; - Offset += PtrAlign.value()) { - if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) + // Shadow allocate any registers that are not properly aligned. + unsigned NextReg = State.getFirstUnallocated(GPRs); + while (NextReg != GPRs.size() && + !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) { + // Shadow allocate next registers since its aligment is not strict enough. + unsigned Reg = State.AllocateReg(GPRs); + // Allocate the stack space shadowed by said register. + State.AllocateStack(PtrSize, PtrAlign); + assert(Reg && "Alocating register unexpectedly failed."); + (void)Reg; + NextReg = State.getFirstUnallocated(GPRs); + } + + const unsigned StackSize = alignTo(ByValSize, ObjAlign); + unsigned Offset = State.AllocateStack(StackSize, ObjAlign); + for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) { + if (unsigned Reg = State.AllocateReg(GPRs)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); else { State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE, @@ -6601,12 +6619,12 @@ LLVM_FALLTHROUGH; case MVT::i1: case MVT::i32: { - const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign); + const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign); // AIX integer arguments are always passed in register width. if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits()) LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt : CCValAssign::LocInfo::ZExt; - if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) + if (unsigned Reg = State.AllocateReg(GPRs)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); else State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo)); @@ -6626,8 +6644,8 @@ State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo)); // Reserve and initialize GPRs or initialize the PSA as required. - for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) { - if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) { + for (unsigned I = 0; I < StoreSize; I += PtrSize) { + if (unsigned Reg = State.AllocateReg(GPRs)) { assert(FReg && "An FPR should be available when a GPR is reserved."); if (State.isVarArg()) { // Successfully reserved GPRs are only initialized for vararg calls. @@ -6679,9 +6697,6 @@ return false; } - const unsigned PtrSize = IsPPC64 ? 8 : 4; - ArrayRef GPRs = IsPPC64 ? GPR_64 : GPR_32; - unsigned NextRegIndex = State.getFirstUnallocated(GPRs); // Burn any underaligned registers and their shadowed stack space until // we reach the required alignment. @@ -7032,9 +7047,6 @@ const MCPhysReg ArgReg = VA.getLocReg(); const PPCFrameLowering *FL = Subtarget.getFrameLowering(); - if (Flags.getNonZeroByValAlign() > PtrByteSize) - report_fatal_error("Over aligned byvals not supported yet."); - const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize); const int FI = MF.getFrameInfo().CreateFixedObject( StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false, diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll --- a/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-byval-limitation3.ll @@ -13,4 +13,4 @@ declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, %struct.S* byval(%struct.S) align 32) -; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than register width are not supported. +; CHECK: LLVM ERROR: Pass-by-value arguments with alignment greater than 16 are not supported. diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval-callee.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \ +; RUN: -mattr=-altivec -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefix=32BIT %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \ +; RUN: -mattr=-altivec -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefix=64BIT %s + +%struct.vec_struct = type { <4 x i32> } + +; Function Attrs: norecurse nounwind readonly +define i32 @vec_struct_test(i32 %i, %struct.vec_struct* nocapture readonly byval(%struct.vec_struct) align 16 %vs) { + ; 32BIT-LABEL: name: vec_struct_test + + ; 32BIT: fixedStack: + ; 32BIT: - { id: 0, type: default, offset: 32, size: 16, alignment: 16, stack-id: default, + ; 32BIT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true, + ; 32BIT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + + ; 32BIT: bb.0.entry: + ; 32BIT: liveins: $r3, $r5, $r6, $r7, $r8 + ; 32BIT: renamable $r3 = nsw ADD4 renamable $r5, killed renamable $r3 + ; 32BIT: STW killed renamable $r7, 8, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 8, align 8) + ; 32BIT: STW killed renamable $r6, 4, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 4) + ; 32BIT: STW killed renamable $r5, 0, %fixed-stack.0 :: (store (s32) into %fixed-stack.0, align 16) + ; 32BIT: STW killed renamable $r8, 12, %fixed-stack.0 :: (store (s32) into %fixed-stack.0 + 12) + ; 32BIT: BLR implicit $lr, implicit $rm, implicit $r3 + + + ; 64BIT-LABEL: name: vec_struct_test + ; 64BIT: fixedStack: + ; 64BIT: - { id: 0, type: default, offset: 64, size: 16, alignment: 16, stack-id: default, + ; 64BIT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true, + ; 64BIT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + + ; 64BIT: bb.0.entry: + ; 64BIT: liveins: $x3, $x5, $x6 + ; 64BIT: STD killed renamable $x5, 0, %fixed-stack.0 :: (store (s64) into %fixed-stack.0, align 16) + ; 64BIT: renamable $r4 = LWZ 0, %fixed-stack.0 :: (dereferenceable load (s32) from %ir.vsi1, align 16) + ; 64BIT: renamable $r3 = nsw ADD4 killed renamable $r4, renamable $r3, implicit killed $x3, implicit-def $x3 + ; 64BIT: STD killed renamable $x6, 8, %fixed-stack.0 :: (store (s64) into %fixed-stack.0 + 8) + ; 64BIT: BLR8 implicit $lr8, implicit $rm, implicit $x3 +entry: + %vsi = getelementptr inbounds %struct.vec_struct, %struct.vec_struct* %vs, i32 0, i32 0 + %0 = load <4 x i32>, <4 x i32>* %vsi, align 16 + %vecext = extractelement <4 x i32> %0, i32 0 + %add = add nsw i32 %vecext, %i + ret i32 %add +} + diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vector-byval.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \ +; RUN: -mattr=-altivec -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefix=32BIT %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -mcpu=pwr7 \ +; RUN: -mattr=-altivec -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefix=64BIT %s + +%struct.B = type { <8 x i16>, i32, i32, [8 x i8] } + +; Function Attrs: nounwind +define i32 @caller() { + ; 32BIT-LABEL: name: caller + ; 32BIT: bb.0.entry: + ; 32BIT: renamable $r3 = LWZ 28, %stack.0.vs :: (load (s32) from unknown-address + 4) + ; 32BIT: STW killed renamable $r3, 60, $r1 :: (store (s32) into unknown-address + 4, basealign 16) + ; 32BIT: renamable $r3 = LWZ 24, %stack.0.vs :: (load (s32) from %stack.0.vs + 24, align 8, basealign 16) + ; 32BIT: STW killed renamable $r3, 56, $r1 :: (store (s32), align 16) + ; 32BIT: ADJCALLSTACKDOWN 64, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT: renamable $r10 = LWZ 20, %stack.0.vs :: (load (s32) from %stack.0.vs + 20) + ; 32BIT: renamable $r9 = LWZ 16, %stack.0.vs :: (load (s32) from %stack.0.vs + 16, align 16) + ; 32BIT: renamable $r8 = LWZ 12, %stack.0.vs :: (load (s32) from %stack.0.vs + 12) + ; 32BIT: renamable $r7 = LWZ 8, %stack.0.vs :: (load (s32) from %stack.0.vs + 8, align 8) + ; 32BIT: renamable $r6 = LWZ 4, %stack.0.vs :: (load (s32) from %stack.0.vs + 4) + ; 32BIT: renamable $r5 = LWZ 0, %stack.0.vs :: (load (s32) from %stack.0.vs, align 16) + ; 32BIT: $r3 = LI 0 + ; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r5, implicit $r6, implicit $r7, implicit $r8, implicit $r9, implicit $r10, implicit $r2, implicit-def $r1, implicit-def $r3 + ; 32BIT: ADJCALLSTACKUP 64, 0, implicit-def dead $r1, implicit $r1 + ; 32BIT: BLR implicit $lr, implicit $rm, implicit $r3 + + ; 64BIT-LABEL: name: caller + ; 64BIT: bb.0.entry: + ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT: renamable $x8 = LD 24, %stack.0.vs :: (load (s64) from %stack.0.vs + 24) + ; 64BIT: renamable $x7 = LD 16, %stack.0.vs :: (load (s64) from %stack.0.vs + 16, align 16) + ; 64BIT: renamable $x6 = LD 8, %stack.0.vs :: (load (s64) from %stack.0.vs + 8) + ; 64BIT: renamable $x5 = LD 0, %stack.0.vs :: (load (s64) from %stack.0.vs, align 16) + ; 64BIT: $x3 = LI8 0 + ; 64BIT: BL8_NOP , csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x5, implicit $x6, implicit $x7, implicit $x8, implicit $x2, implicit-def $r1, implicit-def $x3 + ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + ; 64BIT: BLR8 implicit $lr8, implicit $rm, implicit $x3 + entry: + %vs = alloca %struct.B, align 16 + %call = tail call i32 @vec_struct_test(i32 0, %struct.B* nonnull byval(%struct.B) align 16 %vs) + ret i32 %call +} + +declare i32 @vec_struct_test(i32, %struct.B* byval(%struct.B) align 16)