Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -532,22 +532,29 @@ const DebugLoc &DL = MI->getDebugLoc(); bool IsStore = Desc.mayStore(); - bool RanOutOfSGPRs = false; bool Scavenged = false; unsigned SOffset = ScratchOffsetReg; + const unsigned EltSize = 4; const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); - unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32; - unsigned Size = NumSubRegs * 4; + unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT); + unsigned Size = NumSubRegs * EltSize; int64_t Offset = InstOffset + MFI.getObjectOffset(Index); - const int64_t OriginalImmOffset = Offset; + int64_t ScratchOffsetRegDelta = 0; unsigned Align = MFI.getObjectAlignment(Index); const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); - if (!isUInt<12>(Offset + Size)) { + assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset"); + + if (!isUInt<12>(Offset + Size - EltSize)) { SOffset = AMDGPU::NoRegister; + // We currently only support spilling VGPRs to EltSize boundaries, meaning + // we can simplify the adjustment of Offset here to just scale with + // WavefrontSize. + Offset *= ST.getWavefrontSize(); + // We don't have access to the register scavenger if this function is called // during PEI::scavengeFrameVirtualRegs(). if (RS) @@ -561,8 +568,8 @@ // add the offset directly to the ScratchOffset register, and then // subtract the offset after the spill to return ScratchOffset to it's // original value. - RanOutOfSGPRs = true; SOffset = ScratchOffsetReg; + ScratchOffsetRegDelta = Offset; } else { Scavenged = true; } @@ -574,8 +581,6 @@ Offset = 0; } - const unsigned EltSize = 4; - for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) { unsigned SubReg = NumSubRegs == 1 ? ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i)); @@ -607,11 +612,11 @@ MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); } - if (RanOutOfSGPRs) { + if (ScratchOffsetRegDelta != 0) { // Subtract the offset we added to the ScratchOffset register. BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg) - .addReg(ScratchOffsetReg) - .addImm(OriginalImmOffset); + .addReg(ScratchOffsetReg) + .addImm(ScratchOffsetRegDelta); } } Index: test/CodeGen/AMDGPU/spill-offset-calculation.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/spill-offset-calculation.ll @@ -0,0 +1,153 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 < %s | FileCheck %s + +; Test that the VGPR spiller correctly switches to SGPR offsets when the +; instruction offset field would overflow, and that it accounts for memory +; swizzling. + +; CHECK-LABEL: test_inst_offset +define amdgpu_kernel void @test_inst_offset() { +entry: + ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in + ; the instruction offset field. + %alloca = alloca i8, i32 4088, align 4, addrspace(5) + %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* + + %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 + ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill + %a = load volatile i32, i32 addrspace(5)* %aptr + + ; Force %a to spill. + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}" () + call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}" () + call void asm sideeffect "", "~{v10},~{v11},~{v12},~{v13},~{v14}" () + call void asm sideeffect "", "~{v15},~{v16},~{v17},~{v18},~{v19}" () + call void asm sideeffect "", "~{v20},~{v21},~{v22},~{v23},~{v24}" () + call void asm sideeffect "", "~{v25},~{v26},~{v27},~{v28},~{v29}" () + call void asm sideeffect "", "~{v30},~{v31},~{v32},~{v33},~{v34}" () + call void asm sideeffect "", "~{v35},~{v36},~{v37},~{v38},~{v39}" () + call void asm sideeffect "", "~{v40},~{v41},~{v42},~{v43},~{v44}" () + call void asm sideeffect "", "~{v45},~{v46},~{v47},~{v48},~{v49}" () + call void asm sideeffect "", "~{v50},~{v51},~{v52},~{v53},~{v54}" () + call void asm sideeffect "", "~{v55},~{v56},~{v57},~{v58},~{v59}" () + call void asm sideeffect "", "~{v60},~{v61},~{v62},~{v63},~{v64}" () + call void asm sideeffect "", "~{v65},~{v66},~{v67},~{v68},~{v69}" () + call void asm sideeffect "", "~{v70},~{v71},~{v72},~{v73},~{v74}" () + call void asm sideeffect "", "~{v75},~{v76},~{v77},~{v78},~{v79}" () + call void asm sideeffect "", "~{v80},~{v81},~{v82},~{v83},~{v84}" () + call void asm sideeffect "", "~{v85},~{v86},~{v87},~{v88},~{v89}" () + call void asm sideeffect "", "~{v90},~{v91},~{v92},~{v93},~{v94}" () + call void asm sideeffect "", "~{v95},~{v96},~{v97},~{v98},~{v99}" () + call void asm sideeffect "", "~{v100},~{v101},~{v102},~{v103},~{v104}" () + call void asm sideeffect "", "~{v105},~{v106},~{v107},~{v108},~{v109}" () + call void asm sideeffect "", "~{v110},~{v111},~{v112},~{v113},~{v114}" () + call void asm sideeffect "", "~{v115},~{v116},~{v117},~{v118},~{v119}" () + call void asm sideeffect "", "~{v120},~{v121},~{v122},~{v123},~{v124}" () + call void asm sideeffect "", "~{v125},~{v126},~{v127},~{v128},~{v129}" () + call void asm sideeffect "", "~{v130},~{v131},~{v132},~{v133},~{v134}" () + call void asm sideeffect "", "~{v135},~{v136},~{v137},~{v138},~{v139}" () + call void asm sideeffect "", "~{v140},~{v141},~{v142},~{v143},~{v144}" () + call void asm sideeffect "", "~{v145},~{v146},~{v147},~{v148},~{v149}" () + call void asm sideeffect "", "~{v150},~{v151},~{v152},~{v153},~{v154}" () + call void asm sideeffect "", "~{v155},~{v156},~{v157},~{v158},~{v159}" () + call void asm sideeffect "", "~{v160},~{v161},~{v162},~{v163},~{v164}" () + call void asm sideeffect "", "~{v165},~{v166},~{v167},~{v168},~{v169}" () + call void asm sideeffect "", "~{v170},~{v171},~{v172},~{v173},~{v174}" () + call void asm sideeffect "", "~{v175},~{v176},~{v177},~{v178},~{v179}" () + call void asm sideeffect "", "~{v180},~{v181},~{v182},~{v183},~{v184}" () + call void asm sideeffect "", "~{v185},~{v186},~{v187},~{v188},~{v189}" () + call void asm sideeffect "", "~{v190},~{v191},~{v192},~{v193},~{v194}" () + call void asm sideeffect "", "~{v195},~{v196},~{v197},~{v198},~{v199}" () + call void asm sideeffect "", "~{v200},~{v201},~{v202},~{v203},~{v204}" () + call void asm sideeffect "", "~{v205},~{v206},~{v207},~{v208},~{v209}" () + call void asm sideeffect "", "~{v210},~{v211},~{v212},~{v213},~{v214}" () + call void asm sideeffect "", "~{v215},~{v216},~{v217},~{v218},~{v219}" () + call void asm sideeffect "", "~{v220},~{v221},~{v222},~{v223},~{v224}" () + call void asm sideeffect "", "~{v225},~{v226},~{v227},~{v228},~{v229}" () + call void asm sideeffect "", "~{v230},~{v231},~{v232},~{v233},~{v234}" () + call void asm sideeffect "", "~{v235},~{v236},~{v237},~{v238},~{v239}" () + call void asm sideeffect "", "~{v240},~{v241},~{v242},~{v243},~{v244}" () + call void asm sideeffect "", "~{v245},~{v246},~{v247},~{v248},~{v249}" () + call void asm sideeffect "", "~{v250},~{v251},~{v252},~{v253},~{v254}" () + call void asm sideeffect "", "~{v255}" () + + %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 + store volatile i32 %a, i32 addrspace(5)* %outptr + + ret void +} + +; CHECK-LABEL: test_sgpr_offset +define amdgpu_kernel void @test_sgpr_offset() { +entry: + ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not + ; fit in the instruction, and has to live in the SGPR offset. + %alloca = alloca i8, i32 4092, align 4, addrspace(5) + %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* + + %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 + ; 0x40000 / 64 = 4096 (for wave64) + ; CHECK: s_add_u32 [[SGPR:s[0-9]+]], s{{[0-9]+}}, 0x40000 + ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], [[SGPR]] ; 4-byte Folded Spill + %a = load volatile i32, i32 addrspace(5)* %aptr + + ; Force %a to spill + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}" () + call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}" () + call void asm sideeffect "", "~{v10},~{v11},~{v12},~{v13},~{v14}" () + call void asm sideeffect "", "~{v15},~{v16},~{v17},~{v18},~{v19}" () + call void asm sideeffect "", "~{v20},~{v21},~{v22},~{v23},~{v24}" () + call void asm sideeffect "", "~{v25},~{v26},~{v27},~{v28},~{v29}" () + call void asm sideeffect "", "~{v30},~{v31},~{v32},~{v33},~{v34}" () + call void asm sideeffect "", "~{v35},~{v36},~{v37},~{v38},~{v39}" () + call void asm sideeffect "", "~{v40},~{v41},~{v42},~{v43},~{v44}" () + call void asm sideeffect "", "~{v45},~{v46},~{v47},~{v48},~{v49}" () + call void asm sideeffect "", "~{v50},~{v51},~{v52},~{v53},~{v54}" () + call void asm sideeffect "", "~{v55},~{v56},~{v57},~{v58},~{v59}" () + call void asm sideeffect "", "~{v60},~{v61},~{v62},~{v63},~{v64}" () + call void asm sideeffect "", "~{v65},~{v66},~{v67},~{v68},~{v69}" () + call void asm sideeffect "", "~{v70},~{v71},~{v72},~{v73},~{v74}" () + call void asm sideeffect "", "~{v75},~{v76},~{v77},~{v78},~{v79}" () + call void asm sideeffect "", "~{v80},~{v81},~{v82},~{v83},~{v84}" () + call void asm sideeffect "", "~{v85},~{v86},~{v87},~{v88},~{v89}" () + call void asm sideeffect "", "~{v90},~{v91},~{v92},~{v93},~{v94}" () + call void asm sideeffect "", "~{v95},~{v96},~{v97},~{v98},~{v99}" () + call void asm sideeffect "", "~{v100},~{v101},~{v102},~{v103},~{v104}" () + call void asm sideeffect "", "~{v105},~{v106},~{v107},~{v108},~{v109}" () + call void asm sideeffect "", "~{v110},~{v111},~{v112},~{v113},~{v114}" () + call void asm sideeffect "", "~{v115},~{v116},~{v117},~{v118},~{v119}" () + call void asm sideeffect "", "~{v120},~{v121},~{v122},~{v123},~{v124}" () + call void asm sideeffect "", "~{v125},~{v126},~{v127},~{v128},~{v129}" () + call void asm sideeffect "", "~{v130},~{v131},~{v132},~{v133},~{v134}" () + call void asm sideeffect "", "~{v135},~{v136},~{v137},~{v138},~{v139}" () + call void asm sideeffect "", "~{v140},~{v141},~{v142},~{v143},~{v144}" () + call void asm sideeffect "", "~{v145},~{v146},~{v147},~{v148},~{v149}" () + call void asm sideeffect "", "~{v150},~{v151},~{v152},~{v153},~{v154}" () + call void asm sideeffect "", "~{v155},~{v156},~{v157},~{v158},~{v159}" () + call void asm sideeffect "", "~{v160},~{v161},~{v162},~{v163},~{v164}" () + call void asm sideeffect "", "~{v165},~{v166},~{v167},~{v168},~{v169}" () + call void asm sideeffect "", "~{v170},~{v171},~{v172},~{v173},~{v174}" () + call void asm sideeffect "", "~{v175},~{v176},~{v177},~{v178},~{v179}" () + call void asm sideeffect "", "~{v180},~{v181},~{v182},~{v183},~{v184}" () + call void asm sideeffect "", "~{v185},~{v186},~{v187},~{v188},~{v189}" () + call void asm sideeffect "", "~{v190},~{v191},~{v192},~{v193},~{v194}" () + call void asm sideeffect "", "~{v195},~{v196},~{v197},~{v198},~{v199}" () + call void asm sideeffect "", "~{v200},~{v201},~{v202},~{v203},~{v204}" () + call void asm sideeffect "", "~{v205},~{v206},~{v207},~{v208},~{v209}" () + call void asm sideeffect "", "~{v210},~{v211},~{v212},~{v213},~{v214}" () + call void asm sideeffect "", "~{v215},~{v216},~{v217},~{v218},~{v219}" () + call void asm sideeffect "", "~{v220},~{v221},~{v222},~{v223},~{v224}" () + call void asm sideeffect "", "~{v225},~{v226},~{v227},~{v228},~{v229}" () + call void asm sideeffect "", "~{v230},~{v231},~{v232},~{v233},~{v234}" () + call void asm sideeffect "", "~{v235},~{v236},~{v237},~{v238},~{v239}" () + call void asm sideeffect "", "~{v240},~{v241},~{v242},~{v243},~{v244}" () + call void asm sideeffect "", "~{v245},~{v246},~{v247},~{v248},~{v249}" () + call void asm sideeffect "", "~{v250},~{v251},~{v252},~{v253},~{v254}" () + call void asm sideeffect "", "~{v255}" () + + %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 + store volatile i32 %a, i32 addrspace(5)* %outptr + + ret void +} + +attributes #1 = { nounwind readnone }