diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -405,26 +405,30 @@ CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false); - if (ValEVT.isVector()) { - EVT ElemVT = ValEVT.getVectorElementType(); - if (!ValEVT.isSimple()) - return false; - MVT ValVT = ElemVT.getSimpleVT(); - bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, - OrigArg.Flags, CCInfo); - if (!Res) - return false; - } else { - MVT ValVT = ValEVT.getSimpleVT(); - if (!ValEVT.isSimple()) - return false; - bool Res = - AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo); - - // Fail if we don't know how to handle this type. - if (Res) - return false; + if (!ValEVT.isSimple()) { + if (!ValEVT.isVector() || + ValEVT.getVectorNumElements() !=3 || + ValEVT.getVectorElementType() != MVT::i32 || + OrigArg.Flags.isInReg()) + return false; + + // Special case for v3i32 into vgprs + // Up to VGPR0-VGPR135 + const TargetRegisterClass *RC = &AMDGPU::VReg_96RegClass; + unsigned NumRegs = 45; + ArrayRef RegList = makeArrayRef(RC->begin(), NumRegs); + unsigned RegResult = CCInfo.AllocateReg(RegList); + CCInfo.addLoc(CCValAssign::getReg(i, MVT::Other, RegResult, MVT::Other, + CCValAssign::Full)); + continue; } + MVT ValVT = ValEVT.getSimpleVT(); + bool Res = + AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo); + + // Fail if we don't know how to handle this type. + if (Res) + return false; } Function::const_arg_iterator Arg = F.arg_begin(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -59,7 +59,9 @@ VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119, VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 - ]>>> + ]>>>, + + CCIfNotInReg>> ]>; def RetCC_SI_Shader : CallingConv<[