Index: lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2764,18 +2764,24 @@ return; } case ISD::Constant: { - // Materialize zero constants as copies from WZR/XZR. This allows - // the coalescer to propagate these into other instructions. + // If all uses of zero constants are copies to virutal regs, replace the + // conatants with WZR/XZR. Otherwise, materialize zero constants as copies + // from WZR/XZR and allow the coalescer to propagate these into other + // instructions. ConstantSDNode *ConstNode = cast(Node); if (ConstNode->isNullValue()) { - if (VT == MVT::i32) { - SDValue New = CurDAG->getCopyFromReg( - CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); - ReplaceNode(Node, New.getNode()); - return; - } else if (VT == MVT::i64) { - SDValue New = CurDAG->getCopyFromReg( - CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); + if (VT == MVT::i32 || VT == MVT::i64) { + unsigned ZReg = (VT == MVT::i32) ? AArch64::WZR : AArch64::XZR; + SDValue New; + if (llvm::all_of(Node->uses(), [](SDNode *User) { + return (User->getOpcode() == ISD::CopyToReg && + TargetRegisterInfo::isVirtualRegister( + cast(User->getOperand(1))->getReg())); + })) + New = CurDAG->getRegister(ZReg, VT); + else + New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), + ZReg, VT); ReplaceNode(Node, New.getNode()); return; } Index: test/CodeGen/AArch64/arm64-addr-type-promotion.ll =================================================================== --- test/CodeGen/AArch64/arm64-addr-type-promotion.ll +++ test/CodeGen/AArch64/arm64-addr-type-promotion.ll @@ -28,6 +28,7 @@ ; Next BB ; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2] ; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2] +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]] entry: %idxprom = sext i32 %i1 to i64 Index: test/CodeGen/AArch64/arm64-cse.ll =================================================================== --- test/CodeGen/AArch64/arm64-cse.ll +++ test/CodeGen/AArch64/arm64-cse.ll @@ -10,7 +10,7 @@ ; CHECK: subs ; CHECK-NOT: cmp ; CHECK-NOT: sub -; CHECK: b.ge +; CHECK: b.lt ; CHECK: sub ; CHECK: sub ; CHECK-NOT: sub Index: test/CodeGen/AArch64/copy-zero-reg.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/copy-zero-reg.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s + +; Verify no tiny block has only one mov wzr instruction +define void @unroll_by_2(i32 %trip_count, i32* %p) { +; CHECK-LABEL: unroll_by_2 +; CHECK: // %for.body.lr.ph +; CHECK: mov w{{[0-9]+}}, wzr +; CHECK: // %for.body.lr.ph.new +; CHECK: // %for.body +; CHECK: // %sw.epilog.loopexit +; CHECK: // %for.body.epil +; CHECK: // %exit +; CHECK-NEXT: ret +for.body.lr.ph: + %xtraiter = and i32 %trip_count, 1 + %cmp = icmp eq i32 %trip_count, 1 + br i1 %cmp, label %sw.epilog.loopexit, label %for.body.lr.ph.new + +for.body.lr.ph.new: + %unroll_iter = sub nsw i32 %trip_count, %xtraiter + br label %for.body + +for.body: + %indvars = phi i32 [ 0, %for.body.lr.ph.new ], [ %indvars.next, %for.body ] + %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [ %niter.nsub, %for.body ] + %array = getelementptr inbounds i32, i32 * %p, i32 %indvars + store i32 %niter, i32* %array + %indvars.next = add i32 %indvars, 2 + %niter.nsub = add i32 %niter, -2 + %niter.ncmp = icmp eq i32 %niter.nsub, 0 + br i1 %niter.ncmp, label %sw.epilog.loopexit, label %for.body + +sw.epilog.loopexit: + %indvars.unr = phi i32 [ 0, %for.body.lr.ph ], [ %indvars.next, %for.body ] + %lcmp.mod = icmp eq i32 %xtraiter, 0 + br i1 %lcmp.mod, label %exit, label %for.body.epil + +for.body.epil: + %array.epil = getelementptr inbounds i32, i32* %p, i32 %indvars.unr + store i32 %indvars.unr, i32* %array.epil + br label %exit + +exit: + ret void +} Index: test/CodeGen/AArch64/i128-fast-isel-fallback.ll =================================================================== --- test/CodeGen/AArch64/i128-fast-isel-fallback.ll +++ test/CodeGen/AArch64/i128-fast-isel-fallback.ll @@ -10,7 +10,7 @@ ; registers that make up the i128 pair ; CHECK: mov x0, xzr -; CHECK: mov x1, x0 +; CHECK: mov x1, xzr ; CHECK: bl _test2 }