diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -627,6 +627,14 @@ if (PreRegAlloc && MI.isCall()) return false; + // Copying a block with an INLINEASM_BR instruction may result in a PHI node + // on the indirect path. We assume that all values used on the indirect path + // dominates all paths into the indirect block, i.e. don't have PHI nodes. + // FIXME: This may be too restrictive. Perhaps should restrict only if a + // value in the current block isn't used on the indirect path. + if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) + return false; + if (MI.isBundle()) InstrCount += MI.getBundleSize(); else if (!MI.isPHI() && !MI.isMetaInstruction()) diff --git a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=x86_64-linux -stop-after=early-tailduplication < %s | FileCheck %s + +; Ensure that we don't duplicate a block with an "INLINEASM_BR" instruction +; during code gen. + +declare void @foo() + +define i8* @test1() { + ; CHECK-LABEL: name: test1 + ; CHECK: bb.0.bb: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: [[DEF:%[0-9]+]]:gr64 = IMPLICIT_DEF + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm killed [[DEF]], 1, $noreg, 0, $noreg :: (load 8 from `i8** undef`) + ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags + ; CHECK: [[COPY:%[0-9]+]]:gr8 = COPY [[MOV32r0_]].sub_8bit + ; CHECK: TEST8rr [[COPY]], [[COPY]], implicit-def $eflags + ; CHECK: JCC_1 %bb.2, 5, implicit $eflags + ; CHECK: JMP_1 %bb.1 + ; CHECK: bb.1.bb100: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[DEF1:%[0-9]+]]:gr64 = IMPLICIT_DEF + ; CHECK: MOV64mi32 killed [[DEF1]], 1, $noreg, 0, $noreg, 0 :: (store 8 into `i8** undef`) + ; CHECK: JMP_1 %bb.3 + ; CHECK: bb.2.bb106: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp + ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[MOV32r0_1]], %subreg.sub_32bit + ; CHECK: bb.3.bb110: + ; CHECK: successors: %bb.5(0x80000000), %bb.4(0x00000000) + ; CHECK: [[PHI:%[0-9]+]]:gr64 = PHI [[SUBREG_TO_REG]], %bb.2, [[MOV64rm]], %bb.1 + ; CHECK: INLINEASM_BR &"", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42, 13 /* imm */, 0, 13 /* imm */, blockaddress(@test1, %ir-block.bb17.i.i.i), 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags + ; CHECK: JMP_1 %bb.5 + ; CHECK: bb.4.bb17.i.i.i (address-taken): + ; CHECK: successors: %bb.5(0x80000000) + ; CHECK: bb.5.kmem_cache_has_cpu_partial.exit: + ; CHECK: $rax = COPY [[PHI]] + ; CHECK: RET 0, $rax +bb: + %i28.i = load i8*, i8** undef, align 8 + br i1 undef, label %bb100, label %bb106 + +bb100: ; preds = %bb + store i8* null, i8** undef, align 8 + br label %bb110 + +bb106: ; preds = %bb + call void @foo() + br label %bb110 + +bb110: ; preds = %bb106, %bb100 + %i10.1 = phi i8* [ null, %bb106 ], [ %i28.i, %bb100 ] + callbr void asm sideeffect "", "i,i,X,~{dirflag},~{fpsr},~{flags}"(i32 42, i1 false, i8* blockaddress(@test1, %bb17.i.i.i)) + to label %kmem_cache_has_cpu_partial.exit [label %bb17.i.i.i] + +bb17.i.i.i: ; preds = %bb110 + br label %kmem_cache_has_cpu_partial.exit + +kmem_cache_has_cpu_partial.exit: ; preds = %bb110 + ret i8* %i10.1 +}