Index: lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2782,7 +2782,32 @@ } break; } - + case ISD::CopyToReg: { + // Special case for copy of zero to avoid a double copy. + SDNode *CopyVal = Node->getOperand(2).getNode(); + if (ConstantSDNode *CopyValConst = dyn_cast(CopyVal)) { + if (CopyValConst->isNullValue()) { + unsigned ZeroReg; + EVT ZeroVT = CopyValConst->getValueType(0); + if (ZeroVT == MVT::i32) + ZeroReg = AArch64::WZR; + else if (ZeroVT == MVT::i64) + ZeroReg = AArch64::XZR; + else + break; + const SDValue &Dest = Node->getOperand(1); + if (!TargetRegisterInfo::isVirtualRegister( + cast(Dest)->getReg())) + break; + SDValue ZeroRegVal = CurDAG->getRegister(ZeroReg, ZeroVT); + SDValue New = CurDAG->getNode(ISD::CopyToReg, SDLoc(Node), MVT::Other, + Node->getOperand(0), Dest, ZeroRegVal); + ReplaceNode(Node, New.getNode()); + return; + } + } + break; + } case ISD::FrameIndex: { // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. int FI = cast(Node)->getIndex(); Index: test/CodeGen/AArch64/arm64-addr-type-promotion.ll =================================================================== --- test/CodeGen/AArch64/arm64-addr-type-promotion.ll +++ test/CodeGen/AArch64/arm64-addr-type-promotion.ll @@ -28,6 +28,7 @@ ; Next BB ; CHECK: ldrb [[LOADEDVAL3:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #2] ; CHECK-NEXT: ldrb [[LOADEDVAL4:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #2] +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]] entry: %idxprom = sext i32 %i1 to i64 Index: test/CodeGen/AArch64/arm64-cse.ll =================================================================== --- test/CodeGen/AArch64/arm64-cse.ll +++ test/CodeGen/AArch64/arm64-cse.ll @@ -10,7 +10,7 @@ ; CHECK: subs ; CHECK-NOT: cmp ; CHECK-NOT: sub -; CHECK: b.ge +; CHECK: b.lt ; CHECK: sub ; CHECK: sub ; CHECK-NOT: sub Index: test/CodeGen/AArch64/copy-zero-reg.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/copy-zero-reg.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s + +; Verify no tiny block has only one mov wzr instruction +define void @unroll_by_2(i32 %trip_count, i32* %p) { +; CHECK-LABEL: unroll_by_2 +; CHECK: // %for.body.lr.ph +; CHECK: mov w{{[0-9]+}}, wzr +; CHECK: // %for.body.lr.ph.new +; CHECK: // %for.body +; CHECK: // %sw.epilog.loopexit +; CHECK: // %for.body.epil +; CHECK: // %exit +; CHECK-NEXT: ret +for.body.lr.ph: + %xtraiter = and i32 %trip_count, 1 + %cmp = icmp eq i32 %trip_count, 1 + br i1 %cmp, label %sw.epilog.loopexit, label %for.body.lr.ph.new + +for.body.lr.ph.new: + %unroll_iter = sub nsw i32 %trip_count, %xtraiter + br label %for.body + +for.body: + %indvars = phi i32 [ 0, %for.body.lr.ph.new ], [ %indvars.next, %for.body ] + %niter = phi i32 [ %unroll_iter, %for.body.lr.ph.new ], [ %niter.nsub, %for.body ] + %array = getelementptr inbounds i32, i32 * %p, i32 %indvars + store i32 %niter, i32* %array + %indvars.next = add i32 %indvars, 2 + %niter.nsub = add i32 %niter, -2 + %niter.ncmp = icmp eq i32 %niter.nsub, 0 + br i1 %niter.ncmp, label %sw.epilog.loopexit, label %for.body + +sw.epilog.loopexit: + %indvars.unr = phi i32 [ 0, %for.body.lr.ph ], [ %indvars.next, %for.body ] + %lcmp.mod = icmp eq i32 %xtraiter, 0 + br i1 %lcmp.mod, label %exit, label %for.body.epil + +for.body.epil: + %array.epil = getelementptr inbounds i32, i32* %p, i32 %indvars.unr + store i32 %indvars.unr, i32* %array.epil + br label %exit + +exit: + ret void +} Index: test/CodeGen/AArch64/i128-fast-isel-fallback.ll =================================================================== --- test/CodeGen/AArch64/i128-fast-isel-fallback.ll +++ test/CodeGen/AArch64/i128-fast-isel-fallback.ll @@ -10,7 +10,7 @@ ; registers that make up the i128 pair ; CHECK: mov x0, xzr -; CHECK: mov x1, x0 +; CHECK: mov x1, xzr ; CHECK: bl _test2 }