diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6956,6 +6956,9 @@ if (Subtarget.hasQPX()) report_fatal_error("QPX support is not supported on AIX."); + // Potential tail calls could cause overwriting of argument stack slots. + const bool IsImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); const bool IsPPC64 = Subtarget.isPPC64(); const unsigned PtrByteSize = IsPPC64 ? 8 : 4; @@ -6964,47 +6967,66 @@ MachineFunction &MF = DAG.getMachineFunction(); CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); + const EVT PtrVT = getPointerTy(MF.getDataLayout()); // Reserve space for the linkage area on the stack. const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); - // On AIX a minimum of 8 words is saved to the parameter save area. - const unsigned MinParameterSaveArea = 8 * PtrByteSize; - CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize); + CCInfo.AllocateStack(LinkageSize, PtrByteSize); CCInfo.AnalyzeFormalArguments(Ins, CC_AIX); for (CCValAssign &VA : ArgLocs) { + EVT ValVT = VA.getValVT(); + MVT LocVT = VA.getLocVT(); + SDValue ArgValue; + + if (!VA.isRegLoc() && !VA.isMemLoc()) + report_fatal_error("Unexpected location for function call argument."); + + if (VA.isRegLoc()) { + MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy; + unsigned VReg = + MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64)); + ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); + if (ValVT.isScalarInteger() && + (ValVT.getSizeInBits() < LocVT.getSizeInBits())) { + ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags; + ArgValue = + truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl); + } + InVals.push_back(ArgValue); + } if (VA.isMemLoc()) { // For compatibility with the AIX XL compiler, the float args in the // parameter save area are initialized even if the argument is available // in register. The caller is required to initialize both the register - // and memory, however, the callee can choose to expect it in either. The - // memloc is dismissed here because the argument is retrieved from the - // register. + // and memory, however, the callee can choose to expect it in either. + // The memloc is dismissed here because the argument is retrieved from + // the register. if (VA.needsCustom()) continue; - report_fatal_error( - "Handling of formal arguments on the stack is unimplemented!"); - } - assert(VA.isRegLoc() && "Unexpected argument location."); - - EVT ValVT = VA.getValVT(); - MVT LocVT = VA.getLocVT(); - MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy; - unsigned VReg = - MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64)); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); - if (ValVT.isScalarInteger() && - (ValVT.getSizeInBits() < LocVT.getSizeInBits())) { - ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags; + // Get the extended size of the argument type in stack + const unsigned ArgSize = VA.getLocVT().getStoreSize(); + // Get the actual size of the argument type + const unsigned ObjSize = VA.getValVT().getStoreSize(); + int CurArgOffset = VA.getLocMemOffset(); + // Objects in AIX are right justified. + if (ArgSize < ObjSize) + CurArgOffset += ArgSize - ObjSize; + MachineFrameInfo &MFI = MF.getFrameInfo(); + int FI = MFI.CreateFixedObject(ArgSize, CurArgOffset, IsImmutable); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgValue = - truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl); + DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo()); + InVals.push_back(ArgValue); + } } - InVals.push_back(ArgValue); - } + // On AIX a minimum of 8 words is saved to the parameter save area. + const unsigned MinParameterSaveArea = 8 * PtrByteSize; // Area that is at least reserved in the caller of this function. - unsigned MinReservedArea = CCInfo.getNextStackOffset(); + unsigned MinReservedArea = + std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea); // Set the size that is at least reserved in caller of this function. Tail // call optimized function's reserved stack space needs to be aligned so diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -1266,3 +1266,226 @@ ; ASM64PWR4-NEXT: bl .test_stackarg_float3 ; ASM64PWR4-NEXT: nop ; ASM64PWR4-NEXT: addi 1, 1, 128 + + +define i32 @test_ints_stack(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i64 %ll9, i16 signext %s10, i8 zeroext %c11, i32 %ui12, i32 %si13, i64 %ll14, i8 zeroext %uc15, i32 %i16) { +entry: + %add = add nsw i32 %i1, %i2 + %add1 = add nsw i32 %add, %i3 + %add2 = add nsw i32 %add1, %i4 + %add3 = add nsw i32 %add2, %i5 + %add4 = add nsw i32 %add3, %i6 + %add5 = add nsw i32 %add4, %i7 + %add6 = add nsw i32 %add5, %i8 + %conv = sext i32 %add6 to i64 + %add7 = add nsw i64 %conv, %ll9 + %conv8 = sext i16 %s10 to i64 + %add9 = add nsw i64 %add7, %conv8 + %conv10 = zext i8 %c11 to i64 + %add11 = add nsw i64 %add9, %conv10 + %conv12 = zext i32 %ui12 to i64 + %add13 = add nsw i64 %add11, %conv12 + %conv14 = sext i32 %si13 to i64 + %add15 = add nsw i64 %add13, %conv14 + %add16 = add nsw i64 %add15, %ll14 + %conv17 = zext i8 %uc15 to i64 + %add18 = add nsw i64 %add16, %conv17 + %conv19 = sext i32 %i16 to i64 + %add20 = add nsw i64 %add18, %conv19 + %conv21 = trunc i64 %add20 to i32 + ret i32 %conv21 +} + +; CHECK-LABEL: name: test_ints_stack + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r8', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r9', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r10', virtual-reg: '' } +; 32BIT: fixedStack: +; 32BIT: - { id: 0, type: default, offset: 92, size: 4 +; 32BIT: - { id: 1, type: default, offset: 88, size: 4 +; 32BIT: - { id: 2, type: default, offset: 84, size: 4 +; 32BIT: - { id: 3, type: default, offset: 80, size: 4 +; 32BIT: - { id: 4, type: default, offset: 76, size: 4 +; 32BIT: - { id: 5, type: default, offset: 72, size: 4 +; 32BIT: - { id: 6, type: default, offset: 68, size: 4 +; 32BIT: - { id: 7, type: default, offset: 64, size: 4 +; 32BIT: - { id: 8, type: default, offset: 60, size: 4 +; 32BIT: - { id: 9, type: default, offset: 56, size: 4 +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x7', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x8', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x9', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x10', virtual-reg: '' } +; 64BIT: fixedStack: +; 64BIT: - { id: 0, type: default, offset: 168, size: 8 +; 64BIT: - { id: 1, type: default, offset: 160, size: 8 +; 64BIT: - { id: 2, type: default, offset: 152, size: 8 +; 64BIT: - { id: 3, type: default, offset: 144, size: 8 +; 64BIT: - { id: 4, type: default, offset: 136, size: 8 +; 64BIT: - { id: 5, type: default, offset: 128, size: 8 +; 64BIT: - { id: 6, type: default, offset: 120, size: 8 +; 64BIT: - { id: 7, type: default, offset: 112, size: 8 +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 + +; ASM32PWR4: add 3, 3, 4 +; ASM32PWR4-DAG: lwz [[REG1:[0-9]+]], 60(1) +; ASM32PWR4-DAG: add 3, 3, 5 +; ASM32PWR4-DAG: add 3, 3, 6 +; ASM32PWR4-DAG: add 3, 3, 7 +; ASM32PWR4-DAG: lwz [[REG2:[0-9]+]], 64(1) +; ASM32PWR4-DAG: add 3, 3, 8 +; ASM32PWR4-DAG: add 3, 3, 9 +; ASM32PWR4-DAG: lwz [[REG3:[0-9]+]], 68(1) +; ASM32PWR4-DAG: add 3, 3, 10 +; ASM32PWR4-DAG: add 3, 3, [[REG1] +; ASM32PWR4-DAG: add 3, 3, [[REG2]] +; ASM32PWR4-DAG: lwz [[REG4:[0-9]+]], 72(1) +; ASM32PWR4-DAG: add 3, 3, [[REG3]] +; ASM32PWR4-DAG: lwz [[REG5:[0-9]+]], 76(1) +; ASM32PWR4-DAG: add 3, 3, [[REG4]] +; ASM32PWR4-DAG: lwz [[REG6:[0-9]+]], 84(1) +; ASM32PWR4-DAG: add 3, 3, [[REG5]] +; ASM32PWR4-DAG: lwz [[REG7:[0-9]+]], 88(1) +; ASM32PWR4-DAG: add 3, 3, [[REG6]] +; ASM32PWR4-DAG: lwz [[REG8:[0-9]+]], 92(1) +; ASM32PWR4-DAG: add 3, 3, [[REG7]] +; ASM32PWR4-DAG: add 3, 3, [[REG8]] + +; ASM64PWR4: add 3, 3, 4 +; ASM64PWR4-DAG: ld [[REG1:[0-9]+]], 112(1) +; ASM64PWR4-DAG: add 3, 3, 5 +; ASM64PWR4-DAG: add 3, 3, 6 +; ASM64PWR4-DAG: add 3, 3, 7 +; ASM64PWR4-DAG: ld [[REG2:[0-9]+]], 120(1) +; ASM64PWR4-DAG: add 3, 3, 8 +; ASM64PWR4-DAG: add 3, 3, 9 +; ASM64PWR4-DAG: add 3, 3, 10 +; ASM64PWR4-DAG: std 30, -8(1) +; ASM64PWR4-DAG: extsw 3, 3 +; ASM64PWR4-DAG: add 3, 3, [[REG1]] +; ASM64PWR4-DAG: ld [[REG3:[0-9]+]], 128(1) +; ASM64PWR4-DAG: add 3, 3, [[REG2]] +; ASM64PWR4-DAG: lwz [[REG4:[0-9]+]], 140(1) +; ASM64PWR4-DAG: add 3, 3, [[REG3]] +; ASM64PWR4-DAG: lwa [[REG5:[0-9]+]], 148(1) +; ASM64PWR4-DAG: add 3, 3, [[REG4]] +; ASM64PWR4-DAG: add 3, 3, [[REG5]] +; ASM64PWR4-DAG: ld [[REG6:[0-9]+]], 152(1) +; ASM64PWR4-DAG: ld [[REG7:[0-9]+]], 160(1) +; ASM64PWR4-DAG: add 3, 3, [[REG6]] +; ASM64PWR4-DAG: lwa [[REG8:[0-9]+]], 172(1) +; ASM64PWR4-DAG: add 3, 3, [[REG7]] +; ASM64PWR4-DAG: add 3, 3, [[REG8]] +; ASM64PWR4-DAG: ld 30, -8(1) + + +define double @test_fpr_stack(double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %s10, double %l11, double %d12, double %d13, float %f14, double %d15, float %f16) { + entry: + %add = fadd double %d1, %d2 + %add1 = fadd double %add, %d3 + %add2 = fadd double %add1, %d4 + %add3 = fadd double %add2, %d5 + %add4 = fadd double %add3, %d6 + %add5 = fadd double %add4, %d7 + %add6 = fadd double %add5, %d8 + %add7 = fadd double %add6, %d9 + %add8 = fadd double %add7, %s10 + %add9 = fadd double %add8, %l11 + %add10 = fadd double %add9, %d12 + %add11 = fadd double %add10, %d13 + %add12 = fadd double %add11, %d13 + %conv = fpext float %f14 to double + %add13 = fadd double %add12, %conv + %add14 = fadd double %add13, %d15 + %conv15 = fpext float %f16 to double + %add16 = fadd double %add14, %conv15 + ret double %add16 + } + +; CHECK-LABEL: name: test_fpr_stack{{.*}} + +; CHECK: liveins: +; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f2', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f3', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f4', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f5', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f6', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f7', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f8', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f9', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f10', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f11', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f12', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f13', virtual-reg: '' } + +; CHECK: fixedStack: +; 32BIT: - { id: 0, type: default, offset: 140, size: 4 +; 32BIT: - { id: 1, type: default, offset: 132, size: 8 +; 32BIT: - { id: 2, type: default, offset: 128, size: 4 + +; 64BIT: - { id: 0, type: default, offset: 168, size: 4 +; 64BIT: - { id: 1, type: default, offset: 160, size: 8 +; 64BIT: - { id: 2, type: default, offset: 152, size: 4 + +; CHECK: body: | +; CHECK-NEXT: bb.0.entry: +; CHECK-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 + + +; ASM32PWR4: fadd 0, 1, 2 +; ASM32PWR4-DAG: lfs [[REG1:[0-9]+]], 128(1) +; ASM32PWR4-DAG: fadd 0, 0, 3 +; ASM32PWR4-DAG: lfd [[REG2:[0-9]+]], 132(1) +; ASM32PWR4-DAG: fadd 0, 0, 4 +; ASM32PWR4-DAG: fadd 0, 0, 5 +; ASM32PWR4-DAG: fadd 0, 0, 6 +; ASM32PWR4-DAG: fadd 0, 0, 7 +; ASM32PWR4-DAG: fadd 0, 0, 8 +; ASM32PWR4-DAG: fadd 0, 0, 9 +; ASM32PWR4-DAG: fadd 0, 0, 10 +; ASM32PWR4-DAG: fadd 0, 0, 11 +; ASM32PWR4-DAG: fadd 0, 0, 12 +; ASM32PWR4-DAG: fadd 0, 0, 13 +; ASM32PWR4-DAG: fadd 0, 0, 13 +; ASM32PWR4-DAG: fadd 0, 0, [[REG1]] +; ASM32PWR4-DAG: lfs [[REG3:[0-9]+]], 140(1) +; ASM32PWR4-DAG: fadd 0, 0, [[REG2]] +; ASM32PWR4-DAG: fadd 1, 0, [[REG3]] + +; ASM64PWR4: fadd 0, 1, 2 +; ASM64PWR4-DAG: lfs [[REG1:[0-9]+]], 152(1) +; ASM64PWR4-DAG: fadd 0, 0, 3 +; ASM64PWR4-DAG: lfd [[REG2:[0-9]+]], 160(1) +; ASM64PWR4-DAG: fadd 0, 0, 4 +; ASM64PWR4-DAG: fadd 0, 0, 5 +; ASM64PWR4-DAG: fadd 0, 0, 6 +; ASM64PWR4-DAG: fadd 0, 0, 7 +; ASM64PWR4-DAG: fadd 0, 0, 8 +; ASM64PWR4-DAG: fadd 0, 0, 9 +; ASM64PWR4-DAG: fadd 0, 0, 10 +; ASM64PWR4-DAG: fadd 0, 0, 11 +; ASM64PWR4-DAG: fadd 0, 0, 12 +; ASM64PWR4-DAG: fadd 0, 0, 13 +; ASM64PWR4-DAG: fadd 0, 0, 13 +; ASM64PWR4-DAG: fadd 0, 0, [[REG1]] +; ASM64PWR4-DAG: lfs [[REG3:[0-9]+]], 168(1) +; ASM64PWR4-DAG: fadd 0, 0, [[REG2]] +; ASM64PWR4-DAG: fadd 1, 0, [[REG3]]