Changeset View
Standalone View
llvm/lib/Target/X86/X86ISelLowering.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 3,460 Lines • ▼ Show 20 Lines | |||||
X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, | X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, | ||||
const SmallVectorImpl<ISD::InputArg> &Ins, | const SmallVectorImpl<ISD::InputArg> &Ins, | ||||
const SDLoc &dl, SelectionDAG &DAG, | const SDLoc &dl, SelectionDAG &DAG, | ||||
const CCValAssign &VA, | const CCValAssign &VA, | ||||
MachineFrameInfo &MFI, unsigned i) const { | MachineFrameInfo &MFI, unsigned i) const { | ||||
// Create the nodes corresponding to a load from this parameter slot. | // Create the nodes corresponding to a load from this parameter slot. | ||||
ISD::ArgFlagsTy Flags = Ins[i].Flags; | ISD::ArgFlagsTy Flags = Ins[i].Flags; | ||||
bool AlwaysUseMutable = shouldGuaranteeTCO( | bool AlwaysUseMutable = shouldGuaranteeTCO( | ||||
CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); | CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); | ||||
rnk: I would prefer to see if we can avoid changing the prototype here. The target checks (is win32)… | |||||
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); | bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); | ||||
EVT ValVT; | EVT ValVT; | ||||
MVT PtrVT = getPointerTy(DAG.getDataLayout()); | MVT PtrVT = getPointerTy(DAG.getDataLayout()); | ||||
// If value is passed by pointer we have address passed instead of the value | // If value is passed by pointer we have address passed instead of the value | ||||
// itself. No need to extend if the mask value and location share the same | // itself. No need to extend if the mask value and location share the same | ||||
// absolute size. | // absolute size. | ||||
bool ExtendedInMem = | bool ExtendedInMem = | ||||
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines | X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, | ||||
// Set SExt or ZExt flag. | // Set SExt or ZExt flag. | ||||
if (VA.getLocInfo() == CCValAssign::ZExt) { | if (VA.getLocInfo() == CCValAssign::ZExt) { | ||||
MFI.setObjectZExt(FI, true); | MFI.setObjectZExt(FI, true); | ||||
} else if (VA.getLocInfo() == CCValAssign::SExt) { | } else if (VA.getLocInfo() == CCValAssign::SExt) { | ||||
MFI.setObjectSExt(FI, true); | MFI.setObjectSExt(FI, true); | ||||
} | } | ||||
MaybeAlign Alignment; | |||||
if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() | |||||
Lint: Pre-merge checks clang-format: please reformat the code - if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() - && ValVT != MVT::f80) + if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && + ValVT != MVT::f80) Lint: Pre-merge checks: clang-format: please reformat the code
```
- if (Subtarget.isTargetWindowsMSVC() && !Subtarget. | |||||
&& ValVT != MVT::f80) | |||||
Alignment = MaybeAlign(4); | |||||
SDValue FIN = DAG.getFrameIndex(FI, PtrVT); | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); | ||||
SDValue Val = DAG.getLoad( | SDValue Val = DAG.getLoad( | ||||
ValVT, dl, Chain, FIN, | ValVT, dl, Chain, FIN, | ||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), | ||||
Alignment); | |||||
return ExtendedInMem | return ExtendedInMem | ||||
? (VA.getValVT().isVector() | ? (VA.getValVT().isVector() | ||||
? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) | ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) | ||||
: DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) | : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) | ||||
: Val; | : Val; | ||||
} | } | ||||
// FIXME: Get this from tablegen. | // FIXME: Get this from tablegen. | ||||
▲ Show 20 Lines • Show All 495 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, | SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, | ||||
SDValue Arg, const SDLoc &dl, | SDValue Arg, const SDLoc &dl, | ||||
SelectionDAG &DAG, | SelectionDAG &DAG, | ||||
const CCValAssign &VA, | const CCValAssign &VA, | ||||
ISD::ArgFlagsTy Flags, | ISD::ArgFlagsTy Flags, | ||||
bool isByVal) const { | bool isByVal) const { | ||||
unsigned LocMemOffset = VA.getLocMemOffset(); | unsigned LocMemOffset = VA.getLocMemOffset(); | ||||
Ditto. In this case, we are accumulating consecutive boolean parameters which can reduce readability as well. An alternative solution would be nicer. rnk: Ditto. In this case, we are accumulating consecutive boolean parameters which can reduce… | |||||
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); | SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); | ||||
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), | PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), | ||||
StackPtr, PtrOff); | StackPtr, PtrOff); | ||||
if (isByVal) | if (isByVal) | ||||
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); | return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); | ||||
MaybeAlign Alignment; | |||||
if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() && | |||||
rnkUnsubmitted Not Done ReplyInline ActionsI think this could be simplified to use getStackAlign, but I won't insist. rnk: I think this could be simplified to use getStackAlign, but I won't insist. | |||||
pengfeiAuthorUnsubmitted I don't think so. The alignment are inconstant with different types on different OSs. Take CC_X86_64_C for example, https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/X86/X86CallingConv.td#L592-L603 f80/f128 are determined by layout and vector types are aligned to their size. pengfei: I don't think so. The alignment are inconstant with different types on different OSs. Take… | |||||
Arg.getSimpleValueType() != MVT::f80) | |||||
Not Done ReplyInline ActionsSurely there are other kinds of parameters that are clamped to 4 byte alignment on win32. How are doubles handled? Can we handle them the same way? rnk: Surely there are other kinds of parameters that are clamped to 4 byte alignment on win32. How… | |||||
I think double might be aligned to 8 too. But I think ignore double handling should be ok. There's no difference with alignment equals to 4 and 8, because we don't have aligned instructions for it. pengfei: I think `double` might be aligned to 8 too. But I think ignore `double` handling should be ok. | |||||
Not Done ReplyInline ActionsI think it's important to make sure we have the right alignment values for all types, regardless of whether they have aligned instructions or not. LLVM uses alignment aggressively, so we need to be precise everywhere. I'd still like to see if we can make this condition less target-specific. What's special about win32 in this case is that we only have 4 byte stack alignment. There are other platforms where this is true as well: i686-darwin for example. What is the effect of passing Subtarget.getFrameLowering()->getStackAlign() in place of the MaybeAlign parameter?Presumably it would cause some test failures, but maybe we actually want that behavior. If we do this, do we actually need the IsVarArg parameter at all? To me, it seems unlikely that whether a prototype has varargs or not should affect the way that prototyped arguments are passed. I believe such vectors passed in memory should be passed indirectly. rnk: I think it's important to make sure we have the right alignment values for all types… | |||||
Sorry for the late reply.
That's true, but some variables have their own alignment. See the example in f2. https://godbolt.org/z/xqcvj4YoK
Seems not. At least f80 is aligned to 16 bytes. I just fixed the issue in D113739.
Yes, because the same vector has different alignment between variant and fixed argument. pengfei: Sorry for the late reply.
> What's special about win32 in this case is that we only have 4… | |||||
Not Done ReplyInline ActionsYes, LLVM will realign the stack to store values with high required alignment. I'm saying that, this code, which stores an argument to stack memory, should maybe clamp its alignment assumption to the ABI's stack alignment, which on Windows, happens to be 4. On most other platforms, it will be 16. That seems equivalent to your logic, and more general. I'm asking if this suggestion causes problems in practice. Maybe it causes widespread test failures, I can't say for sure. In any case, I'd like to see a more principled solution.
I think what I'm trying to get at is that, in these two prototypes, v should be passed identically: void f1(int x, v4f v, int y); void f2(int x, v4f v, int y, ...); The IsVarArg boolean affects the entire call site. Adding and removing an ellipsis to the prototype should not change the instructions used to store the prototyped arguments, they should remain the same, and use the regular alignment assumptions, right? rnk: Yes, LLVM will realign the stack to store values with high required alignment. I'm saying that… | |||||
Alignment = MaybeAlign(4); | |||||
return DAG.getStore( | return DAG.getStore( | ||||
Chain, dl, Arg, PtrOff, | Chain, dl, Arg, PtrOff, | ||||
MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); | MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), | ||||
Alignment); | |||||
} | } | ||||
/// Emit a load of return address if tail call | /// Emit a load of return address if tail call | ||||
/// optimization is performed and it is required. | /// optimization is performed and it is required. | ||||
SDValue X86TargetLowering::EmitTailCallLoadRetAddr( | SDValue X86TargetLowering::EmitTailCallLoadRetAddr( | ||||
SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, | SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, | ||||
bool Is64Bit, int FPDiff, const SDLoc &dl) const { | bool Is64Bit, int FPDiff, const SDLoc &dl) const { | ||||
// Adjust the Return address stack slot. | // Adjust the Return address stack slot. | ||||
▲ Show 20 Lines • Show All 32,759 Lines • Show Last 20 Lines |
I would prefer to see if we can avoid changing the prototype here. The target checks (is win32) can be calculated internally by looking at the subtarget. The only thing that varies per call site is the IsVarArg part. If we have to change the prototype, please just pass IsVarArg.