Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -953,6 +953,10 @@ /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } + // Return true if the given SDNode can be copied during scheduling + // even if it has glue. + virtual bool canCopyGluedNodeDuringSchedule(SDNode *N) const { return false; } + protected: /// Target-dependent implementation for foldMemoryOperand. /// Target-independent code in foldMemoryOperand will Index: lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp =================================================================== --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1117,22 +1117,35 @@ if (!N) return nullptr; - if (SU->getNode()->getGluedNode()) + DEBUG(dbgs() << "Considering duplicating the SU\n"); + DEBUG(SU->dump(this)); + + if (N->getGluedNode() && + !TII->canCopyGluedNodeDuringSchedule(N)) { + DEBUG(dbgs() + << "Giving up because it has incoming glue and the target does not " + "want to copy it\n"); return nullptr; + } SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { MVT VT = N->getSimpleValueType(i); - if (VT == MVT::Glue) + if (VT == MVT::Glue) { + llvm::errs() << "Giving up because it has outgoing glue\n"; return nullptr; - else if (VT == MVT::Other) + } else if (VT == MVT::Other) TryUnfold = true; } for (const SDValue &Op : N->op_values()) { MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); - if (VT == MVT::Glue) + if (VT == MVT::Glue + && !TII->canCopyGluedNodeDuringSchedule(N)) { + DEBUG(dbgs() << "Giving up because it one of the operands is glue and " + "the target does not want to copy it\n"); return nullptr; + } } // If possible unfold instruction. Index: lib/Target/ARM/Thumb1InstrInfo.h =================================================================== --- lib/Target/ARM/Thumb1InstrInfo.h +++ lib/Target/ARM/Thumb1InstrInfo.h @@ -53,6 +53,7 @@ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + bool canCopyGluedNodeDuringSchedule(SDNode *N) const override; private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; Index: lib/Target/ARM/Thumb1InstrInfo.cpp =================================================================== --- lib/Target/ARM/Thumb1InstrInfo.cpp +++ lib/Target/ARM/Thumb1InstrInfo.cpp @@ -141,3 +141,16 @@ else expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi); } + +bool Thumb1InstrInfo::canCopyGluedNodeDuringSchedule(SDNode *N) const { + // In Thumb1 the scheduler may need to schedule a cross-copy between GPRS and CPSR + // but this is not always possible there, so allow the Scheduler to clone tADCS and tSBCS + // even if they have glue. + // FIXME. Actually implement the cross-copy where it is possible (post v6) + // because these copies entail more spilling. + unsigned Opcode = N->getMachineOpcode(); + if (Opcode == ARM::tADCS || Opcode == ARM::tSBCS) + return true; + + return false; +} Index: test/CodeGen/Thumb/pr35836.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/pr35836.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv5e-none-linux-gnueabi" + +; Function Attrs: norecurse nounwind optsize +define void @f(i32,i32,i32,i32,i32* %x4p, i32* %x5p, i32* %x6p) { +if.end: + br label %while.body + +while.body: + %ll.0100 = phi i64 [ 0, %if.end ], [ %shr32, %while.body ] + %add = add nuw nsw i64 %ll.0100, 0 + %add3 = add nuw nsw i64 %add, 0 + %shr = lshr i64 %add3, 32 + %conv7 = zext i32 %0 to i64 + %conv9 = zext i32 %1 to i64 + %add10 = add nuw nsw i64 %conv9, %conv7 + %add11 = add nuw nsw i64 %add10, %shr + %shr14 = lshr i64 %add11, 32 + %conv16 = zext i32 %2 to i64 + %conv18 = zext i32 %3 to i64 + %add19 = add nuw nsw i64 %conv18, %conv16 + %add20 = add nuw nsw i64 %add19, %shr14 + %conv21 = trunc i64 %add20 to i32 + store i32 %conv21, i32* %x6p, align 4 + %shr23 = lshr i64 %add20, 32 + %x4 = load i32, i32* %x4p, align 4 + %conv25 = zext i32 %x4 to i64 + %x5 = load i32, i32* %x5p, align 4 + %conv27 = zext i32 %x5 to i64 + %add28 = add nuw nsw i64 %conv27, %conv25 + %add29 = add nuw nsw i64 %add28, %shr23 + %shr32 = lshr i64 %add29, 32 + br label %while.body +} +; CHECK: adds r3, r0, r1 +; CHECK: push {r5} +; CHECK: pop {r1} +; CHECK: adcs r1, r1 +; CHECK: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK: adds r2, r0, r2 +; CHECK: push {r5} +; CHECK: pop {r4} +; CHECK: adcs r4, r4 +; CHECK: adds r0, r2, r5 +; CHECK: push {r3} +; CHECK: pop {r0} +; CHECK: adcs r0, r4 +; CHECK: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK: str r0, [r6] +; CHECK: ldr r0, [r7] +; CHECK: ldr r6, [sp] @ 4-byte Reload +; CHECK: ldr r6, [r6] +; CHECK: adds r0, r6, r0 Index: test/CodeGen/Thumb/pr35836_2.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/pr35836_2.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i128:64-v128:64:128-a:0:64-n64-S64" +target triple = "thumbv6---gnueabi" + +; Function Attrs: norecurse nounwind readonly +define i128 @a(i64* nocapture readonly %z) local_unnamed_addr #0 { +entry: + %0 = load i64, i64* %z, align 4 + %conv.i = zext i64 %0 to i128 + %arrayidx1 = getelementptr inbounds i64, i64* %z, i64 2 + %1 = load i64, i64* %arrayidx1, align 4 + %conv.i38 = zext i64 %1 to i128 + %shl.i39 = shl nuw i128 %conv.i38, 64 + %or = or i128 %shl.i39, %conv.i + %arrayidx3 = getelementptr inbounds i64, i64* %z, i64 1 + %2 = load i64, i64* %arrayidx3, align 4 + %conv.i37 = zext i64 %2 to i128 + %arrayidx5 = getelementptr inbounds i64, i64* %z, i64 3 + %3 = load i64, i64* %arrayidx5, align 4 + %conv.i35 = zext i64 %3 to i128 + %shl.i36 = shl nuw i128 %conv.i35, 64 + %or7 = or i128 %shl.i36, %conv.i37 + %arrayidx10 = getelementptr inbounds i64, i64* %z, i64 4 + %4 = load i64, i64* %arrayidx10, align 4 + %conv.i64 = zext i64 %4 to i128 + %shl.i33 = shl nuw i128 %conv.i64, 64 + %or12 = or i128 %shl.i33, %conv.i + %arrayidx15 = getelementptr inbounds i64, i64* %z, i64 5 + %5 = load i64, i64* %arrayidx15, align 4 + %conv.i30 = zext i64 %5 to i128 + %shl.i = shl nuw i128 %conv.i30, 64 + %or17 = or i128 %shl.i, %conv.i37 + %add = add i128 %or7, %or + %add18 = add i128 %or17, %or12 + %mul = mul i128 %add18, %add + ret i128 %mul +} +; CHECK: adds r4, r2, r7 +; CHECK: mov r4, r1 +; CHECK: adcs r4, r6 +; CHECK: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK: adcs r5, r4 +; CHECK: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK: adcs r3, r4 +; CHECK: adds r4, r2, r7 +; CHECK: adcs r1, r6 +; CHECK: mov r2, sp +; CHECK: str r4, [r2] +; CHECK: str r1, [r2, #4] +; CHECK: ldr r6, [r0, #16] +; CHECK: ldr r7, [r0, #24] +; CHECK: adcs r7, r6 +; CHECK: str r7, [r2, #8] +; CHECK: ldr r6, [r0, #20] +; CHECK: ldr r0, [r0, #28] +; CHECK: adcs r0, r6