Index: lib/Target/X86/X86OptimizeLEAs.cpp =================================================================== --- lib/Target/X86/X86OptimizeLEAs.cpp +++ lib/Target/X86/X86OptimizeLEAs.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file defines the pass that performs some optimizations with LEA -// instructions in order to improve code size. +// instructions in order to improve performance and code size. // Currently, it does two things: // 1) If there are two LEA instructions calculating addresses which only differ // by displacement inside a basic block, one of them is removed. @@ -614,9 +614,7 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - // Perform this optimization only if we care about code size. - if (DisableX86LEAOpt || skipFunction(*MF.getFunction()) || - !MF.getFunction()->optForSize()) + if (DisableX86LEAOpt || skipFunction(*MF.getFunction())) return false; MRI = &MF.getRegInfo(); @@ -635,13 +633,13 @@ if (LEAs.empty()) continue; - // Remove redundant LEA instructions. The optimization may have a negative - // effect on performance, so do it only for -Oz. - if (MF.getFunction()->optForMinSize()) - Changed |= removeRedundantLEAs(LEAs); + // Remove redundant LEA instructions. + Changed |= removeRedundantLEAs(LEAs); - // Remove redundant address calculations. - Changed |= removeRedundantAddrCalc(LEAs); + // Remove redundant address calculations. Do it only for -Os/-Oz since only + // a code size gain is expected from this part of the pass. + if (MF.getFunction()->optForSize()) + Changed |= removeRedundantAddrCalc(LEAs); } return Changed; Index: test/CodeGen/X86/lea-opt.ll =================================================================== --- test/CodeGen/X86/lea-opt.ll +++ test/CodeGen/X86/lea-opt.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK -check-prefix=ENABLED +; RUN: llc --disable-x86-lea-opt < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK -check-prefix=DISABLED %struct.anon1 = type { i32, i32, i32 } %struct.anon2 = type { i32, [32 x i32], i32 } @@ -38,12 +39,14 @@ ; CHECK: movl arr1([[REG1]],[[REG1]],2), {{.*}} ; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]] ; CHECK: subl arr1+4([[REG1]],[[REG1]],2), {{.*}} -; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]] +; DISABLED: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]] ; CHECK: addl arr1+8([[REG1]],[[REG1]],2), {{.*}} ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; ENABLED: movl ${{[1-4]+}}, 4([[REG2]]) +; DISABLED: movl ${{[1-4]+}}, ([[REG3]]) ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; ENABLED: movl ${{[1-4]+}}, 4([[REG2]]) +; DISABLED: movl ${{[1-4]+}}, ([[REG3]]) } define void @test2(i64 %x) nounwind optsize { @@ -75,15 +78,20 @@ ret void ; CHECK-LABEL: test2: ; CHECK: shlq $2, [[REG1:%[a-z]+]] +; DISABLED: movl arr1([[REG1]],[[REG1]],2), {{.*}} ; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]] -; CHECK: movl -4([[REG2]]), {{.*}} -; CHECK: subl ([[REG2]]), {{.*}} -; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]] -; CHECK: addl ([[REG3]]), {{.*}} +; ENABLED: movl -4([[REG2]]), {{.*}} +; ENABLED: subl ([[REG2]]), {{.*}} +; ENABLED: addl 4([[REG2]]), {{.*}} +; DISABLED: subl arr1+4([[REG1]],[[REG1]],2), {{.*}} +; DISABLED: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]] +; DISABLED: addl arr1+8([[REG1]],[[REG1]],2), {{.*}} ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; ENABLED: movl ${{[1-4]+}}, 4([[REG2]]) +; DISABLED: movl ${{[1-4]+}}, ([[REG3]]) ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; ENABLED: movl ${{[1-4]+}}, 4([[REG2]]) +; DISABLED: movl ${{[1-4]+}}, ([[REG3]]) } ; Check that LEA optimization pass takes into account a resultant address @@ -109,7 +117,9 @@ sw.bb.2: ; preds = %entry store i32 333, i32* %a, align 4 - store i32 444, i32* %b, align 4 + ; Make sure the REG3's definition LEA won't be removed as redundant. + %cvt = ptrtoint i32* %b to i32 + store i32 %cvt, i32* %b, align 4 br label %sw.epilog sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry @@ -122,12 +132,14 @@ ; REG3's definition is closer to movl than REG2's, but the pass still chooses ; REG2 because it provides the resultant address displacement fitting 1 byte. -; CHECK: movl ([[REG2]]), {{.*}} -; CHECK: addl ([[REG3]]), {{.*}} +; ENABLED: movl ([[REG2]]), {{.*}} +; ENABLED: addl ([[REG3]]), {{.*}} +; DISABLED: movl arr2+132([[REG1]]), {{.*}} +; DISABLED: addl arr2([[REG1]]), {{.*}} ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) ; CHECK: movl ${{[1-4]+}}, ([[REG3]]) ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; CHECK: movl {{.*}}, ([[REG3]]) } define void @test4(i64 %x) nounwind minsize { @@ -158,12 +170,19 @@ sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry ret void ; CHECK-LABEL: test4: -; CHECK: leaq arr1+4({{.*}}), [[REG2:%[a-z]+]] -; CHECK: movl -4([[REG2]]), {{.*}} -; CHECK: subl ([[REG2]]), {{.*}} -; CHECK: addl 4([[REG2]]), {{.*}} +; CHECK: imulq {{.*}}, [[REG1:%[a-z]+]] +; DISABLED: movl arr1([[REG1]]), {{.*}} +; CHECK: leaq arr1+4([[REG1]]), [[REG2:%[a-z]+]] +; ENABLED: movl -4([[REG2]]), {{.*}} +; ENABLED: subl ([[REG2]]), {{.*}} +; ENABLED: addl 4([[REG2]]), {{.*}} +; DISABLED: subl arr1+4([[REG1]]), {{.*}} +; DISABLED: leaq arr1+8([[REG1]]), [[REG3:%[a-z]+]] +; DISABLED: addl arr1+8([[REG1]]), {{.*}} ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, 4([[REG2]]) +; ENABLED: movl ${{[1-4]+}}, 4([[REG2]]) +; DISABLED: movl ${{[1-4]+}}, ([[REG3]]) ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, 4([[REG2]]) +; ENABLED: movl ${{[1-4]+}}, 4([[REG2]]) +; DISABLED: movl ${{[1-4]+}}, ([[REG3]]) }