Index: lib/Target/X86/X86OptimizeLEAs.cpp =================================================================== --- lib/Target/X86/X86OptimizeLEAs.cpp +++ lib/Target/X86/X86OptimizeLEAs.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file defines the pass that performs some optimizations with LEA -// instructions in order to improve code size. +// instructions in order to improve performance and code size. // Currently, it does two things: // 1) If there are two LEA instructions calculating addresses which only differ // by displacement inside a basic block, one of them is removed. @@ -614,9 +614,7 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - // Perform this optimization only if we care about code size. - if (DisableX86LEAOpt || skipFunction(*MF.getFunction()) || - !MF.getFunction()->optForSize()) + if (DisableX86LEAOpt || skipFunction(*MF.getFunction())) return false; MRI = &MF.getRegInfo(); @@ -635,13 +633,13 @@ if (LEAs.empty()) continue; - // Remove redundant LEA instructions. The optimization may have a negative - // effect on performance, so do it only for -Oz. - if (MF.getFunction()->optForMinSize()) - Changed |= removeRedundantLEAs(LEAs); + // Remove redundant LEA instructions. + Changed |= removeRedundantLEAs(LEAs); - // Remove redundant address calculations. - Changed |= removeRedundantAddrCalc(LEAs); + // Remove redundant address calculations. Do it only for -Os/-Oz since only + // a code size gain is expected from this part of the pass. + if (MF.getFunction()->optForSize()) + Changed |= removeRedundantAddrCalc(LEAs); } return Changed; Index: test/CodeGen/X86/lea-opt-disable.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/lea-opt-disable.ll @@ -0,0 +1,47 @@ +; RUN: llc --disable-x86-lea-opt < %s -mtriple=x86_64-linux | FileCheck %s + +%struct.anon1 = type { i32, i32, i32 } +%struct.anon2 = type { i32, [32 x i32], i32 } + +@arr1 = external global [65 x %struct.anon1], align 16 +@arr2 = external global [65 x %struct.anon2], align 16 + +define void @test1(i64 %x) nounwind { +entry: + %a = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 0 + %tmp = load i32, i32* %a, align 4 + %b = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 1 + %tmp1 = load i32, i32* %b, align 4 + %sub = sub i32 %tmp, %tmp1 + %c = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 2 + %tmp2 = load i32, i32* %c, align 4 + %add = add nsw i32 %sub, %tmp2 + switch i32 %add, label %sw.epilog [ + i32 1, label %sw.bb.1 + i32 2, label %sw.bb.2 + ] + +sw.bb.1: ; preds = %entry + store i32 111, i32* %b, align 4 + store i32 222, i32* %c, align 4 + br label %sw.epilog + +sw.bb.2: ; preds = %entry + store i32 333, i32* %b, align 4 + store i32 444, i32* %c, align 4 + br label %sw.epilog + +sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry + ret void +; CHECK-LABEL: test1: +; CHECK: shlq $2, [[REG1:%[a-z]+]] +; CHECK: movl arr1([[REG1]],[[REG1]],2), {{.*}} +; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]] +; CHECK: subl arr1+4([[REG1]],[[REG1]],2), {{.*}} +; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]] +; CHECK: addl arr1+8([[REG1]],[[REG1]],2), {{.*}} +; CHECK: movl ${{[1-4]+}}, ([[REG2]]) +; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; CHECK: movl ${{[1-4]+}}, ([[REG2]]) +; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +} Index: test/CodeGen/X86/lea-opt.ll =================================================================== --- test/CodeGen/X86/lea-opt.ll +++ test/CodeGen/X86/lea-opt.ll @@ -38,12 +38,11 @@ ; CHECK: movl arr1([[REG1]],[[REG1]],2), {{.*}} ; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]] ; CHECK: subl arr1+4([[REG1]],[[REG1]],2), {{.*}} -; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]] ; CHECK: addl arr1+8([[REG1]],[[REG1]],2), {{.*}} ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; CHECK: movl ${{[1-4]+}}, 4([[REG2]]) ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; CHECK: movl ${{[1-4]+}}, 4([[REG2]]) } define void @test2(i64 %x) nounwind optsize { @@ -78,12 +77,11 @@ ; CHECK: leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]] ; CHECK: movl -4([[REG2]]), {{.*}} ; CHECK: subl ([[REG2]]), {{.*}} -; CHECK: leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]] -; CHECK: addl ([[REG3]]), {{.*}} +; CHECK: addl 4([[REG2]]), {{.*}} ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; CHECK: movl ${{[1-4]+}}, 4([[REG2]]) ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; CHECK: movl ${{[1-4]+}}, 4([[REG2]]) } ; Check that LEA optimization pass takes into account a resultant address @@ -109,7 +107,9 @@ sw.bb.2: ; preds = %entry store i32 333, i32* %a, align 4 - store i32 444, i32* %b, align 4 + ; Make sure the REG3's definition LEA won't be removed as a redundant. + %cvt = ptrtoint i32* %b to i32 + store i32 %cvt, i32* %b, align 4 br label %sw.epilog sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry @@ -127,7 +127,7 @@ ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) ; CHECK: movl ${{[1-4]+}}, ([[REG3]]) ; CHECK: movl ${{[1-4]+}}, ([[REG2]]) -; CHECK: movl ${{[1-4]+}}, ([[REG3]]) +; CHECK: movl {{.*}}, ([[REG3]]) } define void @test4(i64 %x) nounwind minsize {