diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -374,6 +374,13 @@ } } + // All those frame indices which are dead by now should be removed from the + // function frame. Othewise, there is a side effect such as re-mapping of + // free frame index ids by the later pass(es) like "stack slot coloring" + // which in turn could mess-up with the book keeping of "frame index to VGPR + // lane". + FuncInfo->removeDeadFrameIndices(MFI); + MadeChange = true; } else if (FuncInfo->VGPRReservedForSGPRSpill) { FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -443,11 +443,22 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { // The FP & BP spills haven't been inserted yet, so keep them around. + // + // Apart from removing dead frame indices from function frame, also make sure + // to remove them from `SGPRToVGPRSpills` data structure. Otherwise, it could + // result in an unexpected side effect and bug, in case of any re-mapping of + // freed frame indices by later pass(es) like "stack slot coloring". + std::set DeadFrameIndices; for (auto &R : SGPRToVGPRSpills) { - if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) + if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) { MFI.RemoveStackObject(R.first); + DeadFrameIndices.insert(R.first); + } } + for (auto DFI : DeadFrameIndices) + SGPRToVGPRSpills.erase(DFI); + // All other SPGRs must be allocated on the default stack, so reset the stack // ID. for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1350,6 +1350,10 @@ SB.SuperReg != SB.MFI.getFrameOffsetReg())); if (SpillToVGPR) { + + assert(SB.NumSubRegs == VGPRSpills.size() && + "Num of VGPR lanes should be equal to num of SGPRs spilled"); + for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { Register SubReg = SB.NumSubRegs == 1 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll @@ -0,0 +1,723 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s + +; This tests for a bug that caused a crash in SIRegisterInfo::spillSGPR() +; which was due to incorrect book-keeping of removed dead frame indices. + +@.str.0 = external dso_local addrspace(4) constant { [7 x i8], [25 x i8] }, align 32 + +; CHECK-LABEL: {{^}}kernel0: +define protected amdgpu_kernel void @kernel0() local_unnamed_addr { +entry: + %0 = load i64, i64 addrspace(4)* undef, align 8 + %1 = inttoptr i64 %0 to i8* + br i1 icmp eq (i8* addrspacecast (i8 addrspace(4)* getelementptr inbounds ({ [7 x i8], [25 x i8] }, { [7 x i8], [25 x i8] } addrspace(4)* @.str.0, i32 0, i32 0, i64 0) to i8*), i8* null), label %2, label %4 + +2: + %3 = tail call fastcc <2 x i64> @goo(i8* %1, i32 2, i64 undef, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0) + br label %kern.exit + +4: + %5 = phi i64 [ 0, %entry ], [ %238, %outer.loop.exit ] + %6 = select i1 undef, i64 %5, i64 56 + br i1 undef, label %8, label %7 + +7: + br i1 undef, label %loop.exit.6, label %within.loop.6 + +8: + br i1 undef, label %9, label %11 + +9: + br i1 undef, label %10, label %11 + +10: + unreachable + +11: + br i1 undef, label %12, label %14 + +12: + br i1 undef, label %13, label %14 + +13: + unreachable + +14: + br i1 undef, label %15, label %17 + +15: + br i1 undef, label %16, label %17 + +16: + unreachable + +17: + br i1 undef, label %18, label %20 + +18: + br i1 undef, label %19, label %20 + +19: + unreachable + +20: + br i1 undef, label %21, label %23 + +21: + br i1 undef, label %22, label %23 + +22: + unreachable + +23: + br i1 undef, label %24, label %26 + +24: + br i1 undef, label %25, label %26 + +25: + unreachable + +26: + %27 = load i8, i8* undef, align 1 + %28 = icmp ne i8 %27, 0 + br i1 %28, label %29, label %31 + +29: + br i1 undef, label %30, label %31 + +30: + unreachable + +31: + br i1 undef, label %32, label %34 + +32: + br i1 undef, label %33, label %34 + +33: + unreachable + +34: + %35 = add nsw i32 0, -8 + br label %loop.exit.6 + +within.loop.6: + br i1 undef, label %36, label %38 + +36: + br i1 undef, label %37, label %38 + +37: + unreachable + +38: + br i1 undef, label %loop.exit.6, label %within.loop.6 + +loop.exit.6: + %39 = phi i32 [ %35, %34 ], [ 0, %7 ], [ 0, %38 ] + br i1 undef, label %42, label %40 + +40: + %41 = icmp eq i32 %39, 0 + br i1 %41, label %loop.exit.5, label %within.loop.5 + +42: + br i1 undef, label %43, label %45 + +43: + br i1 undef, label %44, label %45 + +44: + unreachable + +45: + br i1 undef, label %46, label %48 + +46: + br i1 undef, label %47, label %48 + +47: + unreachable + +48: + br i1 undef, label %49, label %51 + +49: + br i1 undef, label %50, label %51 + +50: + unreachable + +51: + br i1 undef, label %52, label %54 + +52: + br i1 undef, label %53, label %54 + +53: + unreachable + +54: + br i1 undef, label %55, label %57 + +55: + br i1 undef, label %56, label %57 + +56: + unreachable + +57: + br i1 undef, label %58, label %60 + +58: + br i1 undef, label %59, label %60 + +59: + unreachable + +60: + br i1 undef, label %61, label %63 + +61: + br i1 undef, label %62, label %63 + +62: + unreachable + +63: + br i1 undef, label %64, label %66 + +64: + br i1 undef, label %65, label %66 + +65: + unreachable + +66: + %67 = add nsw i32 %39, -8 + br label %loop.exit.5 + +within.loop.5: + br i1 undef, label %68, label %70 + +68: + br i1 undef, label %69, label %70 + +69: + unreachable + +70: + br i1 undef, label %loop.exit.5, label %within.loop.5 + +loop.exit.5: + %71 = phi i32 [ %67, %66 ], [ 0, %40 ], [ 0, %70 ] + %72 = icmp ugt i32 %71, 7 + br i1 %72, label %74, label %73 + +73: + br i1 undef, label %loop.exit.4, label %within.loop.4 + +74: + br i1 undef, label %75, label %77 + +75: + br i1 undef, label %76, label %77 + +76: + unreachable + +77: + br i1 undef, label %78, label %80 + +78: + br i1 undef, label %79, label %80 + +79: + unreachable + +80: + br i1 undef, label %81, label %83 + +81: + br i1 undef, label %82, label %83 + +82: + unreachable + +83: + br i1 undef, label %84, label %86 + +84: + br i1 undef, label %85, label %86 + +85: + unreachable + +86: + br i1 undef, label %87, label %89 + +87: + br i1 undef, label %88, label %89 + +88: + unreachable + +89: + br i1 undef, label %90, label %92 + +90: + br i1 undef, label %91, label %92 + +91: + unreachable + +92: + br i1 undef, label %93, label %95 + +93: + br i1 undef, label %94, label %95 + +94: + unreachable + +95: + br i1 undef, label %96, label %98 + +96: + br i1 undef, label %97, label %98 + +97: + unreachable + +98: + %99 = add nsw i32 %71, -8 + br label %loop.exit.4 + +within.loop.4: + br i1 undef, label %100, label %102 + +100: + br i1 undef, label %101, label %102 + +101: + unreachable + +102: + br i1 undef, label %loop.exit.4, label %within.loop.4 + +loop.exit.4: + %103 = phi i32 [ %99, %98 ], [ 0, %73 ], [ 0, %102 ] + %104 = icmp ugt i32 %103, 7 + br i1 %104, label %107, label %105 + +105: + %106 = icmp eq i32 %103, 0 + br i1 %106, label %loop.exit.3, label %within.loop.3 + +107: + br i1 undef, label %108, label %110 + +108: + br i1 undef, label %109, label %110 + +109: + unreachable + +110: + br i1 undef, label %111, label %113 + +111: + br i1 undef, label %112, label %113 + +112: + unreachable + +113: + br i1 undef, label %114, label %116 + +114: + br i1 undef, label %115, label %116 + +115: + unreachable + +116: + br i1 undef, label %117, label %119 + +117: + br i1 undef, label %118, label %119 + +118: + unreachable + +119: + br i1 undef, label %120, label %122 + +120: + br i1 undef, label %121, label %122 + +121: + unreachable + +122: + br i1 undef, label %123, label %125 + +123: + br i1 undef, label %124, label %125 + +124: + unreachable + +125: + br i1 undef, label %126, label %128 + +126: + br i1 undef, label %127, label %128 + +127: + unreachable + +128: + br i1 undef, label %129, label %131 + +129: + br i1 undef, label %130, label %131 + +130: + unreachable + +131: + %132 = add nsw i32 %103, -8 + br label %loop.exit.3 + +within.loop.3: + br i1 undef, label %133, label %135 + +133: + br i1 undef, label %134, label %135 + +134: + unreachable + +135: + br i1 undef, label %loop.exit.3, label %within.loop.3 + +loop.exit.3: + %136 = phi i32 [ %132, %131 ], [ 0, %105 ], [ 0, %135 ] + br i1 undef, label %138, label %137 + +137: + br i1 undef, label %loop.exit.2, label %within.loop.2 + +138: + br i1 undef, label %139, label %141 + +139: + br i1 undef, label %140, label %141 + +140: + unreachable + +141: + br i1 undef, label %142, label %144 + +142: + br i1 undef, label %143, label %144 + +143: + unreachable + +144: + br i1 undef, label %145, label %147 + +145: + br i1 undef, label %146, label %147 + +146: + unreachable + +147: + br i1 undef, label %148, label %150 + +148: + br i1 undef, label %149, label %150 + +149: + unreachable + +150: + br i1 undef, label %151, label %153 + +151: + br i1 undef, label %152, label %153 + +152: + unreachable + +153: + br i1 undef, label %154, label %156 + +154: + br i1 undef, label %155, label %156 + +155: + unreachable + +156: + %157 = load i8, i8* undef, align 1 + %158 = icmp ne i8 %157, 0 + br i1 %158, label %159, label %161 + +159: + br i1 undef, label %160, label %161 + +160: + unreachable + +161: + br i1 undef, label %162, label %164 + +162: + br i1 undef, label %163, label %164 + +163: + unreachable + +164: + %165 = add nsw i32 %136, -8 + br label %loop.exit.2 + +within.loop.2: + br i1 undef, label %166, label %168 + +166: + br i1 undef, label %167, label %168 + +167: + unreachable + +168: + br i1 undef, label %loop.exit.2, label %within.loop.2 + +loop.exit.2: + %169 = phi i32 [ %165, %164 ], [ 0, %137 ], [ 0, %168 ] + br i1 undef, label %172, label %170 + +170: + %171 = icmp eq i32 %169, 0 + br i1 %171, label %loop.exit.1, label %within.loop.1 + +172: + br i1 undef, label %173, label %175 + +173: + br i1 undef, label %174, label %175 + +174: + unreachable + +175: + br i1 undef, label %176, label %178 + +176: + br i1 undef, label %177, label %178 + +177: + unreachable + +178: + br i1 undef, label %179, label %181 + +179: + br i1 undef, label %180, label %181 + +180: + unreachable + +181: + br i1 undef, label %182, label %184 + +182: + br i1 undef, label %183, label %184 + +183: + unreachable + +184: + br i1 undef, label %185, label %187 + +185: + br i1 undef, label %186, label %187 + +186: + unreachable + +187: + br i1 undef, label %188, label %190 + +188: + br i1 undef, label %189, label %190 + +189: + unreachable + +190: + br i1 undef, label %191, label %193 + +191: + br i1 undef, label %192, label %193 + +192: + unreachable + +193: + br i1 undef, label %194, label %196 + +194: + br i1 undef, label %195, label %196 + +195: + unreachable + +196: + %197 = add nsw i32 %169, -8 + br label %loop.exit.1 + +within.loop.1: + br i1 undef, label %198, label %200 + +198: + br i1 undef, label %199, label %200 + +199: + unreachable + +200: + br i1 undef, label %loop.exit.1, label %within.loop.1 + +loop.exit.1: + %201 = phi i32 [ %197, %196 ], [ 0, %170 ], [ 0, %200 ] + %202 = icmp ugt i32 %201, 7 + br i1 %202, label %205, label %203 + +203: + %204 = icmp eq i32 %201, 0 + br i1 %204, label %outer.loop.exit, label %within.loop.0 + +205: + %206 = load i8, i8* undef, align 1 + br i1 undef, label %207, label %210 + +207: + %208 = icmp sge i8 0, %206 + br i1 %208, label %209, label %210 + +209: + unreachable + +210: + br i1 undef, label %211, label %213 + +211: + br i1 undef, label %212, label %213 + +212: + call void @foo(i64 undef) + unreachable + +213: + br i1 undef, label %214, label %216 + +214: + br i1 undef, label %215, label %216 + +215: + unreachable + +216: + %217 = load i8, i8* undef, align 1 + %218 = icmp ne i8 %217, 0 + br i1 %218, label %219, label %221 + +219: + br i1 undef, label %220, label %221 + +220: + unreachable + +221: + br i1 undef, label %222, label %224 + +222: + br i1 undef, label %223, label %224 + +223: + unreachable + +224: + br i1 undef, label %225, label %227 + +225: + br i1 undef, label %226, label %227 + +226: + unreachable + +227: + br i1 undef, label %228, label %230 + +228: + br i1 undef, label %229, label %230 + +229: + unreachable + +230: + br i1 undef, label %231, label %233 + +231: + br i1 undef, label %232, label %233 + +232: + unreachable + +233: + br label %outer.loop.exit + +within.loop.0: + br i1 undef, label %234, label %236 + +234: + br i1 undef, label %235, label %236 + +235: + unreachable + +236: + br i1 undef, label %outer.loop.exit, label %within.loop.0 + +outer.loop.exit: + %237 = tail call fastcc <2 x i64> @goo(i8* %1, i32 2, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef) + %238 = sub i64 %5, %6 + %239 = icmp eq i64 %238, 0 + br i1 %239, label %kern.exit, label %4 + +kern.exit: + ret void +} + +define linkonce_odr protected void @foo(i64 %0) local_unnamed_addr { + %2 = call i32 @llvm.amdgcn.workgroup.id.x() + %3 = zext i32 %2 to i64 + %4 = call i32 @llvm.amdgcn.workgroup.id.z() + %5 = zext i32 %4 to i64 + %6 = call fastcc <2 x i64> @goo(i8* undef, i32 4, i64 undef, i64 undef, i64 %3, i64 undef, i64 %5, i64 undef, i64 undef, i64 1) + ret void +} + +declare i32 @llvm.amdgcn.workgroup.id.x() + +declare i32 @llvm.amdgcn.workgroup.id.z() + +declare dso_local fastcc <2 x i64> @goo(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) unnamed_addr