Index: llvm/lib/CodeGen/BreakFalseDeps.cpp =================================================================== --- llvm/lib/CodeGen/BreakFalseDeps.cpp +++ llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -177,6 +177,7 @@ assert(!MI->isDebugInstr() && "Won't process debug values"); // Break dependence on undef uses. Do this before updating LiveRegs below. + // This can remove a false dependence with no additional instructions. unsigned OpNum; unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); if (Pref) { @@ -188,6 +189,11 @@ UndefReads.push_back(std::make_pair(MI, OpNum)); } + // The code below allows the target to create a new instruction to break the + // dependence. That opposes the goal of minimizing size, so bail out now. + if (MF->getFunction().hasMinSize()) + return; + const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); @@ -208,6 +214,11 @@ if (UndefReads.empty()) return; + // The code below allows the target to create a new instruction to break the + // dependence. That opposes the goal of minimizing size, so bail out now. + if (MF->getFunction().hasMinSize()) + return; + // Collect this block's live out register units. LiveRegSet.init(*TRI); // We do not need to care about pristine registers as they are just preserved Index: llvm/test/CodeGen/ARM/a15-partial-update.ll =================================================================== --- llvm/test/CodeGen/ARM/a15-partial-update.ll +++ llvm/test/CodeGen/ARM/a15-partial-update.ll @@ -56,6 +56,8 @@ ret void } +; If minimizing size, that overrides perf, so no extra vmov.f64 here. + define void @t2_minsize(<4 x i8> *%in, <4 x i8> *%out, i32 %n) minsize { ; CHECK-LABEL: t2_minsize: ; CHECK: @ %bb.0: @ %entry @@ -63,7 +65,6 @@ ; CHECK-NEXT: add r1, r1, #4 ; CHECK-NEXT: .LBB2_1: @ %loop ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmov.f64 d16, #5.000000e-01 ; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] ; CHECK-NEXT: vmovl.u8 q8, d16 ; CHECK-NEXT: vuzp.8 d16, d18 Index: llvm/test/CodeGen/X86/sqrt-partial.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-partial.ll +++ llvm/test/CodeGen/X86/sqrt-partial.ll @@ -67,7 +67,6 @@ ; SSE-NEXT: mulsd %xmm0, %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm1 ; SSE-NEXT: addsd %xmm0, %xmm1 -; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: sqrtsd %xmm1, %xmm0 ; SSE-NEXT: retq ; Index: llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll =================================================================== --- llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll +++ llvm/test/CodeGen/X86/stack-folding-fp-avx1.ll @@ -2876,7 +2876,6 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vroundsd $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() Index: llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll =================================================================== --- llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll +++ llvm/test/CodeGen/X86/stack-folding-fp-sse42.ll @@ -583,7 +583,6 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsd2ss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -975,7 +974,6 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtss2sd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -2012,7 +2010,6 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: roundss $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -2181,7 +2178,6 @@ ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: sqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()