Index: lib/Target/AMDGPU/AMDGPUCallingConv.td =================================================================== --- lib/Target/AMDGPU/AMDGPUCallingConv.td +++ lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -25,14 +25,10 @@ SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39 ]>>>, - CCIfInReg>>, + // We have no way of referring to the generated register tuples + // here, so use a custom function. + CCIfInReg>>, + CCIfByVal>>, // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. CCIfNotInReg>>, - - CCIfByVal>> - + ]>>> ]>; def RetCC_SI : CallingConv<[ Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -44,6 +44,37 @@ return true; } +static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State, + const TargetRegisterClass *RC, + unsigned NumRegs) { + ArrayRef RegList = makeArrayRef(RC->begin(), NumRegs); + unsigned RegResult = State.AllocateReg(RegList); + if (!RegResult) + return false; + + State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo)); + return true; +} + +static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + switch (LocVT.SimpleTy) { + case MVT::i64: + case MVT::f64: + case MVT::v2i32: + case MVT::v2f32: { + // Up to SGPR0-SGPR39 + return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, + &AMDGPU::SGPR_64RegClass, 20); + } + default: + return false; + } +} + #include "AMDGPUGenCallingConv.inc" // Find a larger type to do a load / store of a vector with. Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1087,17 +1087,6 @@ assert(VA.isRegLoc() && "Parameter must be in a register!"); unsigned Reg = VA.getLocReg(); - - if (VT == MVT::i64) { - // For now assume it is a pointer - Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, - &AMDGPU::SGPR_64RegClass); - Reg = MF.addLiveIn(Reg, &AMDGPU::SGPR_64RegClass); - SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT); - InVals.push_back(Copy); - continue; - } - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); Reg = MF.addLiveIn(Reg, RC);