Index: lib/Target/AMDGPU/AMDGPUCallingConv.td =================================================================== --- lib/Target/AMDGPU/AMDGPUCallingConv.td +++ lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -25,14 +25,10 @@ SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39 ]>>>, - CCIfInReg>>, + // We have no way of referring to the generated register tuples + // here, so use a custom function. + CCIfInReg>>, + CCIfByVal>>, // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. CCIfNotInReg>>, - - CCIfByVal>> - + ]>>> ]>; + def RetCC_SI : CallingConv<[ CCIfType<[i32] , CCAssignToReg<[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -44,6 +44,28 @@ return true; } +static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + switch (LocVT.SimpleTy) { + case MVT::i64: + case MVT::f64: + case MVT::v2i32: + case MVT::v2f32: { + // Up to SGPR0-SGPR39 + ArrayRef SReg_64_CC = makeArrayRef(AMDGPU::SGPR_64RegClass.begin(), 20); + unsigned RegResult = State.AllocateReg(SReg_64_CC); + if (!RegResult) + return false; + + State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo)); + return true; + } + default: + return false; + } +} + #include "AMDGPUGenCallingConv.inc" // Find a larger type to do a load / store of a vector with. Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1087,17 +1087,6 @@ assert(VA.isRegLoc() && "Parameter must be in a register!"); unsigned Reg = VA.getLocReg(); - - if (VT == MVT::i64) { - // For now assume it is a pointer - Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, - &AMDGPU::SGPR_64RegClass); - Reg = MF.addLiveIn(Reg, &AMDGPU::SGPR_64RegClass); - SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT); - InVals.push_back(Copy); - continue; - } - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); Reg = MF.addLiveIn(Reg, RC);