Index: llvm/lib/CodeGen/GlobalISel/CallLowering.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -500,21 +500,9 @@ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], Args[i].Flags[0], CCInfo)) return false; - - // If we couldn't directly assign this part, some casting may be - // necessary. Create the new register, but defer inserting the conversion - // instructions. - assert(Args[i].OrigRegs.empty()); - Args[i].OrigRegs.push_back(Args[i].Regs[0]); - assert(Args[i].Regs.size() == 1); - - const LLT VATy(NewVT); - Args[i].Regs[0] = MRI.createGenericVirtualRegister(VATy); continue; } - const LLT NewLLT(NewVT); - // For incoming arguments (physregs to vregs), we could have values in // physregs (or memlocs) which we want to extract and copy to vregs. // During this, we might have to deal with the LLT being split across @@ -523,56 +511,23 @@ // If we have outgoing args, then we have the opposite case. We have a // vreg with an LLT which we want to assign to a physical location, and // we might have to record that the value has to be split later. - if (Handler.isIncomingArgumentHandler()) { - // We're handling an incoming arg which is split over multiple regs. - // E.g. passing an s128 on AArch64. - ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; - Args[i].OrigRegs.push_back(Args[i].Regs[0]); - Args[i].Regs.clear(); - Args[i].Flags.clear(); - // For each split register, create and assign a vreg that will store - // the incoming component of the larger value. These will later be - // merged to form the final vreg. - for (unsigned Part = 0; Part < NumParts; ++Part) { - Register Reg = MRI.createGenericVirtualRegister(NewLLT); - ISD::ArgFlagsTy Flags = OrigFlags; - if (Part == 0) { - Flags.setSplit(); - } else { - Flags.setOrigAlign(Align(1)); - if (Part == NumParts - 1) - Flags.setSplitEnd(); - } - Args[i].Regs.push_back(Reg); - Args[i].Flags.push_back(Flags); - if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], - Args[i].Flags[Part], CCInfo)) { - // Still couldn't assign this smaller part type for some reason. - return false; - } + + // We're handling an incoming arg which is split over multiple regs. + // E.g. passing an s128 on AArch64. + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + Args[i].Flags.clear(); + + for (unsigned Part = 0; Part < NumParts; ++Part) { + ISD::ArgFlagsTy Flags = OrigFlags; + if (Part == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align(1)); + if (Part == NumParts - 1) + Flags.setSplitEnd(); } - } else { - assert(Args[i].Regs.size() == 1); - - // This type is passed via multiple registers in the calling convention. - // We need to extract the individual parts. - assert(Args[i].OrigRegs.empty()); - Args[i].OrigRegs.push_back(Args[i].Regs[0]); - - ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; - // We're going to replace the regs and flags with the split ones. - Args[i].Regs.clear(); - Args[i].Flags.clear(); - for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { - ISD::ArgFlagsTy Flags = OrigFlags; - if (PartIdx == 0) { - Flags.setSplit(); - } else { - Flags.setOrigAlign(Align(1)); - if (PartIdx == NumParts - 1) - Flags.setSplitEnd(); - } + if (!Handler.isIncomingArgumentHandler()) { // TODO: Also check if there is a valid extension that preserves the // bits. However currently this call lowering doesn't support non-exact // split parts, so that can't be tested. @@ -580,21 +535,19 @@ (NumParts * NewVT.getSizeInBits() != CurVT.getSizeInBits())) { Flags.setReturned(false); } + } - Register NewReg = MRI.createGenericVirtualRegister(NewLLT); - - Args[i].Regs.push_back(NewReg); - Args[i].Flags.push_back(Flags); - if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, - Args[i], Args[i].Flags[PartIdx], CCInfo)) - return false; + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[Part], CCInfo)) { + // Still couldn't assign this smaller part type for some reason. + return false; } } } - for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) { + for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) { assert(j < ArgLocs.size() && "Skipped too many arg locs"); - CCValAssign &VA = ArgLocs[j]; assert(VA.getValNo() == i && "Location doesn't correspond to current arg"); @@ -607,14 +560,33 @@ continue; } - EVT VAVT = VA.getValVT(); + const EVT VAVT = VA.getValVT(); + const LLT NewLLT(VAVT.getSimpleVT()); const LLT OrigTy = getLLTForType(*Args[i].Ty, DL); - const LLT VATy(VAVT.getSimpleVT()); // Expected to be multiple regs for a single incoming arg. // There should be Regs.size() ArgLocs per argument. - unsigned NumArgRegs = Args[i].Regs.size(); - assert((j + (NumArgRegs - 1)) < ArgLocs.size() && + // This should be the same as getNumRegistersForCallingConv + const unsigned NumParts = Args[i].Flags.size(); + + // Now split the registers into the assigned types. + Args[i].OrigRegs.assign(Args[i].Regs.begin(), Args[i].Regs.end()); + + if (NumParts != 1 || NewLLT != OrigTy) { + // If we can't directly assign the register, we need one or more + // intermediate values. + Args[i].Regs.resize(NumParts); + + // For each split register, create and assign a vreg that will store + // the incoming component of the larger value. These will later be + // merged to form the final vreg. + for (unsigned Part = 0; Part < NumParts; ++Part) + Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT); + } + + const LLT VATy(VAVT.getSimpleVT()); + + assert((j + (NumParts - 1)) < ArgLocs.size() && "Too many regs for number of args"); // Coerce into outgoing value types before register assignment. @@ -624,7 +596,7 @@ VATy, extendOpFromFlags(Args[i].Flags[0])); } - for (unsigned Part = 0; Part < NumArgRegs; ++Part) { + for (unsigned Part = 0; Part < NumParts; ++Part) { Register ArgReg = Args[i].Regs[Part]; // There should be Regs.size() ArgLocs per argument. VA = ArgLocs[j + Part]; @@ -710,7 +682,7 @@ VATy); } - j += NumArgRegs - 1; + j += NumParts - 1; } return true;