Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -2282,6 +2282,13 @@ assert(Subtarget->supportsTailCall()); + // Tail calls to function pointers cannot be optimized for Thumb1 if the args + // to the call take up r0-r3. The reason is that there are no legal registers + // left to hold the pointer to the function to be called. + if (Subtarget->isThumb1Only() && Outs.size() >= 4 && + !isa(Callee.getNode())) + return false; + // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. Index: llvm/trunk/test/CodeGen/ARM/v8m-tail-call.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/v8m-tail-call.ll +++ llvm/trunk/test/CodeGen/ARM/v8m-tail-call.ll @@ -45,3 +45,61 @@ ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: b h2 } + +; Make sure that tail calls to function pointers that require r0-r3 for argument +; passing do not break the compiler. +@fnptr = global i32 (i32, i32, i32, i32)* null +define i32 @test3() { +; CHECK-LABEL: test3: +; CHECK: blx {{r[0-9]+}} + %1 = load i32 (i32, i32, i32, i32)*, i32 (i32, i32, i32, i32)** @fnptr + %2 = tail call i32 %1(i32 1, i32 2, i32 3, i32 4) + ret i32 %2 +} + +@fnptr2 = global i32 (i32, i32, i64)* null +define i32 @test4() { +; CHECK-LABEL: test4: +; CHECK: blx {{r[0-9]+}} + %1 = load i32 (i32, i32, i64)*, i32 (i32, i32, i64)** @fnptr2 + %2 = tail call i32 %1(i32 1, i32 2, i64 3) + ret i32 %2 +} + +; Check that tail calls to function pointers where not all of r0-r3 are used for +; parameter passing are tail-call optimized. +; test5: params in r0, r1. r2 & r3 are free. +@fnptr3 = global i32 (i32, i32)* null +define i32 @test5() { +; CHECK-LABEL: test5: +; CHECK: ldr [[REG:r[0-9]+]] +; CHECK: bx [[REG]] +; CHECK-NOT: blx [[REG]] + %1 = load i32 (i32, i32)*, i32 (i32, i32)** @fnptr3 + %2 = tail call i32 %1(i32 1, i32 2) + ret i32 %2 +} + +; test6: params in r0 and r2-r3. r1 is free. +@fnptr4 = global i32 (i32, i64)* null +define i32 @test6() { +; CHECK-LABEL: test6: +; CHECK: ldr [[REG:r[0-9]+]] +; CHECK: bx [[REG]] +; CHECK-NOT: blx [[REG]] + %1 = load i32 (i32, i64)*, i32 (i32, i64)** @fnptr4 + %2 = tail call i32 %1(i32 1, i64 2) + ret i32 %2 +} + +; Check that tail calls to functions other than function pointers are +; tail-call optimized. +define i32 @test7() { +; CHECK-LABEL: test7: +; CHECK: b bar +; CHECK-NOT: bl bar + %tail = tail call i32 @bar(i32 1, i32 2, i32 3, i32 4) + ret i32 %tail +} + +declare i32 @bar(i32, i32, i32, i32)