diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -209,6 +209,11 @@ I != E;) { MachineInstr *mi = &*(I++); + if (mi->getOpcode() == TargetOpcode::INLINEASM_BR) { + li.markNotSpillable(); + return -1.0; + } + // For local split artifacts, we are interested only in instructions between // the expected start and end of the range. SlotIndex si = LIS.getInstructionIndex(*mi); diff --git a/llvm/test/CodeGen/X86/callbr-asm-regalloc.ll b/llvm/test/CodeGen/X86/callbr-asm-regalloc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/callbr-asm-regalloc.ll @@ -0,0 +1,96 @@ +; Test that the greedy register allocator emits MachineInstrs that pass +; verification. Previously, the callbr was being lowered to an INLINEASM_BR +; with a non-terminator register spill which violated the MachineVerifier +; invariant that all MachineInstrs post the first Terminal MachineInstr were +; also Terminal. The unexpected spill looked like: +; MOV32mr %stack.0, 1, $noreg, 0, $noreg, %44:gr32 :: (store 4 into %stack.0) + +; RUN: llc -regalloc=greedy -stop-after=greedy -verify-regalloc < %s + +%struct.d = type { i8* } +%struct.b = type {} + +@j = dso_local local_unnamed_addr global i32 0, align 4 +@i = dso_local local_unnamed_addr global i32 0, align 4 +@h = dso_local local_unnamed_addr global i32 0, align 4 +@g = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local i32 @cmsghdr_from_user_compat_to_kern(%struct.d* %0) #0 { + br label %2 + +2: ; preds = %1, %9 + %3 = phi i32 [ %14, %9 ], [ undef, %1 ] + %4 = load i32, i32* @j, align 4 + %5 = load i32, i32* @i, align 4 + %6 = icmp eq i32 %5, 0 + br i1 %6, label %7, label %44 + +7: ; preds = %2 + %8 = icmp eq i32 %4, 0 + br i1 %8, label %44, label %9 + +9: ; preds = %7 + %10 = zext i32 %4 to i64 + %11 = add nuw nsw i64 %10, 1 + %12 = and i64 %11, 8589934590 + %13 = trunc i64 %12 to i32 + store i32 %13, i32* @h, align 4 + %14 = tail call i32 @f(%struct.d* %0, i32 %3, i32 %4) #3 + %15 = icmp eq i32 %14, 0 + br i1 %15, label %16, label %2 + +16: ; preds = %9 + %17 = icmp eq i64 %12, 0 + br i1 %17, label %18, label %44 + +18: ; preds = %16 + %19 = icmp eq %struct.d* %0, null + br i1 %19, label %44, label %20 + +20: ; preds = %18 + %21 = bitcast %struct.d* %0 to i64* + %22 = load i64, i64* %21, align 8 + %23 = trunc i64 %22 to i32 + %24 = icmp eq i32 %23, 0 + br i1 %24, label %44, label %25, !prof !0 + +25: ; preds = %20, %38 + %26 = phi i32 [ %42, %38 ], [ %23, %20 ] + %27 = phi i64 [ %41, %38 ], [ 0, %20 ] + %28 = callbr i32 asm "1:\09mov $1,$0\0A .pushsection \22__ex_table\22,\22a\22\0A .long 1b - .\0A .long ${2:l} - .\0A .long 0 - .\0A .popsection\0A", "=r,*m,X,~{dirflag},~{fpsr},~{flags}"(%struct.b* null, i8* blockaddress(@cmsghdr_from_user_compat_to_kern, %29)) #4 + to label %31 [label %29] + +29: ; preds = %25 + %30 = tail call i32 @a() #3 + br label %44 + +31: ; preds = %25 + %32 = zext i32 %28 to i64 + %33 = shl i64 %27, 32 + %34 = ashr exact i64 %33, 32 + %35 = inttoptr i64 %34 to i32* + %36 = tail call i32 @c(i32* %35, i64 %32) #3 + %37 = icmp eq i32 %36, 0 + br i1 %37, label %38, label %44 + +38: ; preds = %31 + %39 = add nuw nsw i64 %32, 8 + %40 = and i64 %39, 8589934590 + %41 = add nsw i64 %40, %34 + %42 = tail call i32 @f(%struct.d* nonnull %0, i32 %26, i32 %28) #3 + %43 = icmp eq i32 %42, 0 + br i1 %43, label %44, label %25, !prof !0 + +44: ; preds = %7, %2, %31, %38, %18, %20, %29, %16 + %45 = phi i32 [ -22, %16 ], [ undef, %29 ], [ undef, %20 ], [ undef, %18 ], [ undef, %38 ], [ undef, %31 ], [ -22, %7 ], [ 4, %2 ] + ret i32 %45 +} + +declare dso_local i32 @f(%struct.d*, i32, i32) local_unnamed_addr #1 +declare dso_local i32 @a() local_unnamed_addr #1 +declare dso_local i32 @c(i32*, i64) local_unnamed_addr #1 +define dso_local i32 @put_cmsg_compat() local_unnamed_addr #2 { + ret i32 undef +} + +!0 = !{!"branch_weights", i32 2146410443, i32 1073205} diff --git a/llvm/test/CodeGen/X86/callbr-asm-regalloc.mir b/llvm/test/CodeGen/X86/callbr-asm-regalloc.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/callbr-asm-regalloc.mir @@ -0,0 +1,179 @@ +# RUN: llc -regalloc=greedy -stop-after=greedy -verify-regalloc < %s +# +# Dumped via: llc -stop-after=finalize-isel -print-after=finalize-isel \ +# -simplify-mir llvm/test/CodeGen/X86/callbr-asm-regalloc.ll \ +# 2> callbr-asm-regalloc.mir +# +# *** IR Dump After Finalize ISel and expand pseudo-instructions ***: +# Machine code for function cmsghdr_from_user_compat_to_kern: IsSSA, TracksLiveness +Function Live Ins: $rdi in %14 + +bb.0 (%ir-block.1): + successors: %bb.1(0x80000000); %bb.1(100.00%) + liveins: $rdi + %14:gr64 = COPY $rdi + %15:gr32 = IMPLICIT_DEF + %0:gr64 = MOV64ri 8589934590 + +bb.1 (%ir-block.2): +; predecessors: %bb.0, %bb.3 + successors: %bb.2(0x7c000000), %bb.12(0x04000000); %bb.2(96.88%), %bb.12(3.12%) + + %1:gr32 = PHI %15:gr32, %bb.0, %4:gr32, %bb.3 + %17:gr32 = MOV32rm $rip, 1, $noreg, @j, $noreg :: (dereferenceable load 4 from @j) + %2:gr64 = SUBREG_TO_REG 0, killed %17:gr32, %subreg.sub_32bit + %16:gr32 = MOV32ri 4 + CMP32mi8 $rip, 1, $noreg, @i, $noreg, 0, implicit-def $eflags :: (dereferenceable load 4 from @i) + JCC_1 %bb.12, 5, implicit $eflags + JMP_1 %bb.2 + +bb.2 (%ir-block.8): +; predecessors: %bb.1 + successors: %bb.12(0x04000000), %bb.3(0x7c000000); %bb.12(3.12%), %bb.3(96.88%) + + %19:gr32 = COPY %2.sub_32bit:gr64 + %18:gr32 = MOV32ri -22 + TEST32rr %19:gr32, %19:gr32, implicit-def $eflags + JCC_1 %bb.12, 4, implicit $eflags + JMP_1 %bb.3 + +bb.3 (%ir-block.11): +; predecessors: %bb.2 + successors: %bb.4(0x04000000), %bb.1(0x7c000000); %bb.4(3.12%), %bb.1(96.88%) + + %20:gr32 = COPY %2.sub_32bit:gr64 + %21:gr64 = nuw nsw INC64r %2:gr64(tied-def 0), implicit-def dead $eflags + %3:gr64 = AND64rr %21:gr64(tied-def 0), %0:gr64, implicit-def dead $eflags + %22:gr32 = COPY %3.sub_32bit:gr64 + MOV32mr $rip, 1, $noreg, @h, $noreg, killed %22:gr32 :: (store 4 into @h) + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %14:gr64 + $esi = COPY %1:gr32 + $edx = COPY %20:gr32 + CALL64pcrel32 @f, , implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $edx, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %23:gr32 = COPY $eax + %4:gr32 = COPY %23:gr32 + TEST32rr %23:gr32, %23:gr32, implicit-def $eflags + JCC_1 %bb.1, 5, implicit $eflags + JMP_1 %bb.4 + +bb.4 (%ir-block.18): +; predecessors: %bb.3 + successors: %bb.5(0x30000000), %bb.12(0x50000000); %bb.5(37.50%), %bb.12(62.50%) + + %24:gr32 = MOV32ri -22 + TEST64rr %3:gr64, %3:gr64, implicit-def $eflags + JCC_1 %bb.12, 5, implicit $eflags + JMP_1 %bb.5 + +bb.5 (%ir-block.20): +; predecessors: %bb.4 + successors: %bb.12(0x30000000), %bb.6(0x50000000); %bb.12(37.50%), %bb.6(62.50%) + + %25:gr32 = IMPLICIT_DEF + TEST64rr %14:gr64, %14:gr64, implicit-def $eflags + JCC_1 %bb.12, 4, implicit $eflags + JMP_1 %bb.6 + +bb.6 (%ir-block.22): +; predecessors: %bb.5 + successors: %bb.12(0x7fef9fcb), %bb.7(0x00106035); %bb.12(99.95%), %bb.7(0.05%) + + %5:gr32 = MOV32rm %14:gr64, 1, $noreg, 0, $noreg :: (load 4 from %ir.23, align 8) + %26:gr32 = IMPLICIT_DEF + TEST32rr %5:gr32, %5:gr32, implicit-def $eflags + JCC_1 %bb.12, 4, implicit $eflags + JMP_1 %bb.7 + +bb.7..preheader: +; predecessors: %bb.6 + successors: %bb.8(0x80000000); %bb.8(100.00%) + + %28:gr32 = MOV32r0 implicit-def dead $eflags + %27:gr64 = SUBREG_TO_REG 0, killed %28:gr32, %subreg.sub_32bit + +bb.8 (%ir-block.27): +; predecessors: %bb.7, %bb.11 + successors: %bb.9(0x00000000), %bb.13(0x80000000); %bb.9(0.00%), %bb.13(100.00%) + + %6:gr32 = PHI %5:gr32, %bb.7, %12:gr32, %bb.11 + %7:gr64 = PHI %27:gr64, %bb.7, %11:gr64, %bb.11 + INLINEASM_BR &"1:\09mov $1,$0\0A .pushsection \22__ex_table\22,\22a\22\0A .long 1b - .\0A .long ${2:l} - .\0A .long 0 - .\0A .popsection\0A" [mayload] [attdialect], $0:[regdef:GR32], def %29:gr32, $1:[mem:m], $noreg, 1, $noreg, 0, $noreg, $2:[imm], blockaddress(@cmsghdr_from_user_compat_to_kern, %ir-block.31), $3:[clobber], implicit-def early-clobber $df, $4:[clobber], implicit-def early-clobber $fpsw, $5:[clobber], implicit-def early-clobber $eflags + +bb.13 (%ir-block.27): +; predecessors: %bb.8 + successors: %bb.10(0x80000000); %bb.10(100.00%) + + %8:gr32 = COPY %29:gr32 + JMP_1 %bb.10 + +bb.9 (%ir-block.31, address-taken): +; predecessors: %bb.8 + successors: %bb.12(0x80000000); %bb.12(100.00%) + + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + CALL64pcrel32 @a, , implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %31:gr32 = COPY $eax + %30:gr32 = IMPLICIT_DEF + JMP_1 %bb.12 + +bb.10 (%ir-block.33): +; predecessors: %bb.13 + successors: %bb.11(0x7c000000), %bb.12(0x04000000); %bb.11(96.88%), %bb.12(3.12%) + + %33:gr32 = MOV32rr %8:gr32 + %9:gr64 = SUBREG_TO_REG 0, killed %33:gr32, %subreg.sub_32bit + %34:gr32 = COPY %7.sub_32bit:gr64 + %10:gr64 = MOVSX64rr32 killed %34:gr32 + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %10:gr64 + $rsi = COPY %9:gr64 + CALL64pcrel32 @c, , implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %35:gr32 = COPY $eax + %32:gr32 = IMPLICIT_DEF + TEST32rr %35:gr32, %35:gr32, implicit-def $eflags + JCC_1 %bb.12, 5, implicit $eflags + JMP_1 %bb.11 + +bb.11 (%ir-block.40): +; predecessors: %bb.10 + successors: %bb.12(0x7fef9fcb), %bb.8(0x00106035); %bb.12(99.95%), %bb.8(0.05%) + + %37:gr64 = nuw nsw ADD64ri8 %9:gr64(tied-def 0), 8, implicit-def dead $eflags + %38:gr64 = AND64rr %37:gr64(tied-def 0), %0:gr64, implicit-def dead $eflags + %11:gr64 = nsw ADD64rr %38:gr64(tied-def 0), %10:gr64, implicit-def dead $eflags + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rdi = COPY %14:gr64 + $esi = COPY %6:gr32 + $edx = COPY %8:gr32 + CALL64pcrel32 @f, , implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $edx, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %39:gr32 = COPY $eax + %12:gr32 = COPY %39:gr32 + %36:gr32 = IMPLICIT_DEF + TEST32rr %39:gr32, %39:gr32, implicit-def $eflags + JCC_1 %bb.8, 5, implicit $eflags + JMP_1 %bb.12 + +bb.12 (%ir-block.46): +; predecessors: %bb.1, %bb.2, %bb.4, %bb.5, %bb.6, %bb.9, %bb.10, %bb.11 + + %13:gr32 = PHI %16:gr32, %bb.1, %18:gr32, %bb.2, %24:gr32, %bb.4, %25:gr32, %bb.5, %26:gr32, %bb.6, %30:gr32, %bb.9, %32:gr32, %bb.10, %36:gr32, %bb.11 + $eax = COPY %13:gr32 + RET 0, $eax + +# End machine code for function cmsghdr_from_user_compat_to_kern. + +# *** IR Dump After Finalize ISel and expand pseudo-instructions ***: +# Machine code for function put_cmsg_compat: IsSSA, TracksLiveness + +bb.0 (%ir-block.0): + %0:gr32 = IMPLICIT_DEF + $eax = COPY %0:gr32 + RET 0, $eax + +# End machine code for function put_cmsg_compat. +