Index: llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp +++ llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp @@ -1582,6 +1582,14 @@ return false; } } + + // We must also check for overlaps with regmask clobbers. + BitVector RegMaskUsable; + if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) && + !RegMaskUsable.test(DstReg)) { + DEBUG(dbgs() << "\t\tRegMask interference\n"); + return false; + } } // Skip any value computations, we are not adding new values to the @@ -1616,14 +1624,6 @@ DEBUG(dbgs() << "\t\tInterference (read): " << *MI); return false; } - - // We must also check for clobbers caused by regmasks. - for (const auto &MO : MI->operands()) { - if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) { - DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI); - return false; - } - } } } Index: llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir +++ llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir @@ -1,5 +1,7 @@ # RUN: llc -mtriple=aarch64-apple-ios -run-pass=simple-register-coalescing %s -o - | FileCheck %s --- | + declare void @f2() + define void @func() { ret void } ... --- @@ -14,6 +16,7 @@ - { id: 4, class: gpr64 } - { id: 5, class: gpr32 } - { id: 6, class: xseqpairsclass } + - { id: 7, class: gpr64 } body: | bb.0: ; We usually should not coalesce copies from allocatable physregs. @@ -64,4 +67,16 @@ ; CHECK: HINT 0, implicit %6 %6 = COPY %xzr_x0 HINT 0, implicit %6 + + ; It is not fine to coalesce copies from reserved physregs when they are + ; clobbered by the regmask on a call. + ; CHECK: %7 = COPY %x18 + ; CHECK: BL @f2, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp + ; CHECK: STRXui %7, %x1, 0 + + ; Need a def of x18 so that it's not deduced as "constant". + %x18 = COPY %xzr + %7 = COPY %x18 + BL @f2, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp + STRXui %7, %x1, 0 ... Index: llvm/trunk/test/CodeGen/SPARC/register-clobber.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/register-clobber.ll +++ llvm/trunk/test/CodeGen/SPARC/register-clobber.ll @@ -0,0 +1,35 @@ +; RUN: llc -march=sparc < %s | FileCheck %s + +;; Verify that g1 (the output of first asm) is properly understood to +;; be clobbered by the call instruction, and moved out of the way +;; before it. (NOTE: remember delay slot; mov executes before call) + +; CHECK-LABEL: test1: +; CHECK: ta 9 +; CHECK: call dosomething +; CHECK: mov %g1, %i0 + +define i32 @test1() nounwind { +entry: + %0 = tail call i32 asm sideeffect "ta $1", "={r1},i"(i32 9) nounwind + tail call void @dosomething() nounwind + ret i32 %0 +} + +;; Also check using the value. +; CHECK-LABEL: test2: +; CHECK: ta 9 +; CHECK: call dosomething +; CHECK: mov %g1, %i0 +; CHECK: mov %i0, %g1 +; CHECK: ta 10 + +define void @test2() local_unnamed_addr nounwind { +entry: + %0 = tail call i32 asm sideeffect "ta $1", "={r1},i"(i32 9) nounwind + tail call void @dosomething() nounwind + tail call void asm sideeffect "ta $0", "i,{r1}"(i32 10, i32 %0) nounwind + ret void +} + +declare void @dosomething() local_unnamed_addr nounwind