Index: lib/CodeGen/RegisterScavenging.cpp =================================================================== --- lib/CodeGen/RegisterScavenging.cpp +++ lib/CodeGen/RegisterScavenging.cpp @@ -392,11 +392,31 @@ return SReg; } - // Find an available scavenging slot. - unsigned SI; - for (SI = 0; SI < Scavenged.size(); ++SI) - if (Scavenged[SI].Reg == 0) - break; + // Find an available scavenging slot with size and alignment matching + // the requirements of the class RC. + MachineFunction &MF = *I->getParent()->getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned NeedSize = RC->getSize(); + unsigned NeedAlign = RC->getAlignment(); + + unsigned SI = Scavenged.size(), Diff = UINT_MAX; + for (unsigned I = 0; I < Scavenged.size(); ++I) { + if (Scavenged[I].Reg != 0) + continue; + // Verify that this slot is valid for this register. + int FI = Scavenged[I].FrameIndex; + unsigned S = MFI.getObjectSize(FI); + unsigned A = MFI.getObjectAlignment(FI); + if (NeedSize > S || NeedAlign > A) + continue; + // Avoid wasting slots with large size and/or large alignment. Pick one + // that is the best fit for this register class (in street metric). + unsigned D = (S-NeedSize) + (A-NeedAlign); + if (D < Diff) { + SI = I; + Diff = D; + } + } if (SI == Scavenged.size()) { // We need to scavenge a register but have no spill slot, the target @@ -411,8 +431,13 @@ // otherwise, use the emergency stack spill slot. if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { // Spill the scavenged register before I. - assert(Scavenged[SI].FrameIndex >= 0 && + if (Scavenged[SI].FrameIndex < 0) { +#ifndef NDEBUG + dbgs() << "Trying to spill " << PrintReg(SReg, TRI) << "\n"; +#endif + llvm_unreachable( "Cannot scavenge register without an emergency spill slot!"); + } TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, RC, TRI); MachineBasicBlock::iterator II = std::prev(I); Index: test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll @@ -0,0 +1,124 @@ +; RUN: llc -march=hexagon < %s +; REQUIRES: asserts + +target triple = "hexagon" + +define i32 @foo(i32 %n, i32* readonly %pi, i32* nocapture %po, <16 x i32>* readonly %vi, <16 x i32>* nocapture %vo) #0 { +entry: + br i1 undef, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %incdec.ptr63 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi, i32 32 + %incdec.ptr61 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi, i32 30 + %incdec.ptr60 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi, i32 29 + %0 = load <16 x i32>, <16 x i32>* %incdec.ptr60, align 64 + %incdec.ptr59 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi, i32 28 + %1 = load <16 x i32>, <16 x i32>* %incdec.ptr59, align 64 + %incdec.ptr58 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi, i32 27 + %2 = load <16 x i32>, <16 x i32>* %incdec.ptr58, align 64 + %incdec.ptr57 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi, i32 26 + %3 = load <16 x i32>, <16 x i32>* %incdec.ptr57, align 64 + %incdec.ptr56 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi, i32 25 + %4 = load <16 x i32>, <16 x i32>* %incdec.ptr56, align 64 + %5 = load <16 x i32>, <16 x i32>* undef, align 64 + %incdec.ptr40 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi, i32 9 + %6 = load <16 x i32>, <16 x i32>* %incdec.ptr40, align 64 + br i1 undef, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new + +for.body.preheader.new: ; preds = %for.body.preheader + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader.new + %po.addr.0289 = phi i32* [ %po, %for.body.preheader.new ], [ %incdec.ptr64.7, %for.body ] + %vi.addr.0288 = phi <16 x i32>* [ %incdec.ptr63, %for.body.preheader.new ], [ %incdec.ptr67.7, %for.body ] + %vo.addr.0287 = phi <16 x i32>* [ %vo, %for.body.preheader.new ], [ %incdec.ptr66.7, %for.body ] + %v31.0.in286 = phi <16 x i32>* [ undef, %for.body.preheader.new ], [ %incdec.ptr67.6, %for.body ] + %v30.0285 = phi <16 x i32> [ zeroinitializer, %for.body.preheader.new ], [ %v31.0.7, %for.body ] + %v29.0284 = phi <16 x i32> [ %0, %for.body.preheader.new ], [ %v31.0.6, %for.body ] + %v28.0283 = phi <16 x i32> [ %1, %for.body.preheader.new ], [ undef, %for.body ] + %v27.0282 = phi <16 x i32> [ %2, %for.body.preheader.new ], [ %v31.0.4, %for.body ] + %v26.0281 = phi <16 x i32> [ %3, %for.body.preheader.new ], [ %v31.0.3, %for.body ] + %v25.0280 = phi <16 x i32> [ %4, %for.body.preheader.new ], [ undef, %for.body ] + %v23.0278 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v31.0, %for.body ] + %v22.0277 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v30.0285, %for.body ] + %v21.0276 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v29.0284, %for.body ] + %v20.0275 = phi <16 x i32> [ %5, %for.body.preheader.new ], [ %v28.0283, %for.body ] + %v19.0274 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v27.0282, %for.body ] + %v18.0273 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v26.0281, %for.body ] + %v17.0272 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v25.0280, %for.body ] + %v15.0270 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v23.0278, %for.body ] + %v14.0269 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v22.0277, %for.body ] + %v13.0268 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v21.0276, %for.body ] + %v12.0267 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v20.0275, %for.body ] + %v11.0266 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v19.0274, %for.body ] + %v10.0265 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v18.0273, %for.body ] + %v9.0264 = phi <16 x i32> [ %6, %for.body.preheader.new ], [ %v17.0272, %for.body ] + %v7.0262 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v15.0270, %for.body ] + %v6.0261 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v14.0269, %for.body ] + %v5.0260 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v13.0268, %for.body ] + %v4.0259 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v12.0267, %for.body ] + %v3.0258 = phi <16 x i32> [ undef, %for.body.preheader.new ], [ %v11.0266, %for.body ] + %niter = phi i32 [ undef, %for.body.preheader.new ], [ %niter.nsub.7, %for.body ] + %v31.0 = load <16 x i32>, <16 x i32>* %v31.0.in286, align 64 + %incdec.ptr66.1 = getelementptr inbounds <16 x i32>, <16 x i32>* %vo.addr.0287, i32 2 + store <16 x i32> undef, <16 x i32>* %incdec.ptr66.1, align 64 + %incdec.ptr67.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi.addr.0288, i32 3 + %v31.0.3 = load <16 x i32>, <16 x i32>* undef, align 64 + %incdec.ptr64.3 = getelementptr inbounds i32, i32* %po.addr.0289, i32 4 + %7 = tail call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> undef, <16 x i32> %v31.0) + %8 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %7, <16 x i32> %v15.0270, <16 x i32> %v31.0) + %9 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %8, <16 x i32> %v23.0278, <16 x i32> undef) + %10 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %9, <16 x i32> %v4.0259, <16 x i32> %v3.0258) + %11 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %7, <16 x i32> %v3.0258, <16 x i32> %v5.0260) + %12 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %8, <16 x i32> %11, <16 x i32> %v6.0261) + %13 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %9, <16 x i32> %12, <16 x i32> %v7.0262) + %14 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %10, <16 x i32> %13, <16 x i32> undef) + %incdec.ptr66.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %vo.addr.0287, i32 4 + store <16 x i32> %14, <16 x i32>* undef, align 64 + %incdec.ptr67.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi.addr.0288, i32 4 + %v31.0.4 = load <16 x i32>, <16 x i32>* %incdec.ptr67.2, align 64 + %15 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> undef, <16 x i32> undef, <16 x i32> undef) + %16 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> undef, <16 x i32> %15, <16 x i32> %v9.0264) + %17 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> undef, <16 x i32> %16, <16 x i32> %v10.0265) + %incdec.ptr66.5 = getelementptr inbounds <16 x i32>, <16 x i32>* %vo.addr.0287, i32 6 + store <16 x i32> %17, <16 x i32>* undef, align 64 + %incdec.ptr67.5 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi.addr.0288, i32 6 + %v31.0.6 = load <16 x i32>, <16 x i32>* undef, align 64 + %incdec.ptr64.6 = getelementptr inbounds i32, i32* %po.addr.0289, i32 7 + %18 = tail call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> undef, <16 x i32> %v31.0.3) + %19 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> undef, <16 x i32> %v26.0281, <16 x i32> %v27.0282) + %20 = tail call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %19, <16 x i32> %v7.0262, <16 x i32> %v6.0261) + %21 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %18, <16 x i32> %v6.0261, <16 x i32> undef) + %22 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> undef, <16 x i32> %21, <16 x i32> %v9.0264) + %23 = tail call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %19, <16 x i32> %22, <16 x i32> %v10.0265) + %24 = tail call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %20, <16 x i32> %23, <16 x i32> %v11.0266) + %incdec.ptr66.6 = getelementptr inbounds <16 x i32>, <16 x i32>* %vo.addr.0287, i32 7 + store <16 x i32> %24, <16 x i32>* %incdec.ptr66.5, align 64 + %incdec.ptr67.6 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi.addr.0288, i32 7 + %v31.0.7 = load <16 x i32>, <16 x i32>* %incdec.ptr67.5, align 64 + %incdec.ptr64.7 = getelementptr inbounds i32, i32* %po.addr.0289, i32 8 + store i32 undef, i32* %incdec.ptr64.6, align 4 + %incdec.ptr66.7 = getelementptr inbounds <16 x i32>, <16 x i32>* %vo.addr.0287, i32 8 + store <16 x i32> undef, <16 x i32>* %incdec.ptr66.6, align 64 + %incdec.ptr67.7 = getelementptr inbounds <16 x i32>, <16 x i32>* %vi.addr.0288, i32 8 + %niter.nsub.7 = add i32 %niter, -8 + %niter.ncmp.7 = icmp eq i32 %niter.nsub.7, 0 + br i1 %niter.ncmp.7, label %for.end.loopexit.unr-lcssa, label %for.body + +for.end.loopexit.unr-lcssa: ; preds = %for.body, %for.body.preheader + br i1 undef, label %for.end, label %for.body.epil + +for.body.epil: ; preds = %for.body.epil, %for.end.loopexit.unr-lcssa + br i1 undef, label %for.end, label %for.body.epil + +for.end: ; preds = %for.body.epil, %for.end.loopexit.unr-lcssa, %entry + ret i32 0 +} + +declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1 +declare <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1 +declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1 +declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1 + +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #1 = { nounwind readnone }