Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -1988,32 +1988,10 @@ // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; - // Tail call byval lowering might overwrite argument registers so in case of - // tail call optimization the copies to registers are lowered later. - if (!isTailCall) - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // For tail calls lower the arguments to the 'real' stack slot. - if (isTailCall) { - // Force all the incoming stack arguments to be loaded from the stack - // before any new outgoing arguments are stored to the stack, because the - // outgoing stack slots may alias the incoming argument stack slots, and - // the alias isn't otherwise explicit. This is slightly more conservative - // than necessary, because it means that each store effectively depends - // on every argument instead of just those arguments it would clobber. - - // Do not flag preceding copytoreg stuff together with the following stuff. - InFlag = SDValue(); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every Index: llvm/trunk/test/CodeGen/ARM/tail-call-scheduling.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/tail-call-scheduling.ll +++ llvm/trunk/test/CodeGen/ARM/tail-call-scheduling.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s | FileCheck %s +target triple = "armv6kz-unknown-unknown-gnueabihf" + +; Make sure this doesn't crash, and we actually emit a tail call. +; Unfortunately, this test is sort of fragile... the original issue only +; shows up if scheduling happens in a very specific order. But including +; it anyway just to demonstrate the issue. +; CHECK: pop {r4, lr} + +@e = external local_unnamed_addr constant [0 x i32 (i32, i32)*], align 4 + +; Function Attrs: nounwind sspstrong +define i32 @AVI_ChunkRead_p_chk(i32 %g) nounwind sspstrong "target-cpu"="arm1176jzf-s" { +entry: + %b = alloca i8, align 1 + %tobool = icmp eq i32 %g, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %add = add nsw i32 %g, 1 + %arrayidx = getelementptr inbounds [0 x i32 (i32, i32)*], [0 x i32 (i32, i32)*]* @e, i32 0, i32 %add + %0 = load i32 (i32, i32)*, i32 (i32, i32)** %arrayidx, align 4 + %call = tail call i32 %0(i32 0, i32 0) #3 + br label %return + +if.end: ; preds = %entry + call void @c(i8* nonnull %b) + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ 0, %if.end ] + ret i32 %retval.0 +} + +declare void @c(i8*)