Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3146,11 +3146,9 @@ // If we have a definitive size, we can use it. Otherwise we need to inspect // the operands to know the size. - if (DescSize == 8 || DescSize == 4) + if (DescSize != 0) return DescSize; - assert(DescSize == 0); - // 4-byte instructions may have a 32-bit literal encoded after them. Check // operands that coud ever be literals. if (isVALU(MI) || isSALU(MI)) { Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1948,6 +1948,8 @@ SReg_32:$scratch_offset, i32imm:$offset)> { let mayStore = 1; let mayLoad = 0; + // (2 * 4) + (8 * num_subregs) bytes maximum + let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8); } def _RESTORE : PseudoInstSI < @@ -1956,6 +1958,9 @@ i32imm:$offset)> { let mayStore = 0; let mayLoad = 1; + + // (2 * 4) + (8 * num_subregs) bytes maximum + let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8); } } // End UseNamedOperandTable = 1, VGPRSpill = 1 } Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -193,6 +193,7 @@ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add (sequence "VGPR%u", 0, 255))> { let AllocationPriority = 1; + let Size = 32; } // VGPR 64-bit registers @@ -306,6 +307,8 @@ // Register class for all vector registers (VGPRs + Interploation Registers) def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 32, (add VGPR_64)> { + let Size = 64; + // Requires 2 v_mov_b32 to copy let CopyCost = 2; let AllocationPriority = 2; @@ -320,17 +323,21 @@ } def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> { + let Size = 128; + // Requires 4 v_mov_b32 to copy let CopyCost = 4; let AllocationPriority = 4; } def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> { + let Size = 256; let CopyCost = 8; let AllocationPriority = 5; } def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> { + let Size = 512; let CopyCost = 16; let AllocationPriority = 6; }