Index: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1207,12 +1207,24 @@ } } + // If the instruction is convertible to 3 Addr, instead + // of returning try 3 Addr transformation aggresively and + // use this variable to check later. Because it might be better. + // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret` + // instead of the following code. + // addl %esi, %edi + // movl %edi, %eax + // ret + bool commuted = false; + // If it's profitable to commute, try to do so. if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) { + commuted = true; ++NumCommuted; if (AggressiveCommute) ++NumAggrCommuted; - return false; + if (!MI.isConvertibleTo3Addr()) + return false; } if (shouldOnlyCommute) @@ -1220,7 +1232,7 @@ // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. - if (EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) { + if (!commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) { ++NumReSchedDowns; return true; } @@ -1237,6 +1249,10 @@ } } + // Return if it is commuted but 3 addr conversion is failed. + if (commuted) + return false; + // If there is one more use of regB later in the same MBB, consider // re-schedule it before this MI if it's legal. if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) { Index: llvm/trunk/test/CodeGen/X86/commute-two-addr.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/commute-two-addr.ll +++ llvm/trunk/test/CodeGen/X86/commute-two-addr.ll @@ -39,7 +39,7 @@ entry: ; DARWIN-LABEL: t3: ; DARWIN: shlq $32, %rcx -; DARWIN-NEXT: orq %rcx, %rax +; DARWIN-NEXT: leaq (%rax,%rcx), %rax ; DARWIN-NEXT: shll $8 ; DARWIN-NOT: leaq %tmp21 = zext i32 %lb to i64 Index: llvm/trunk/test/CodeGen/X86/twoaddr-lea.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/twoaddr-lea.ll +++ llvm/trunk/test/CodeGen/X86/twoaddr-lea.ll @@ -25,8 +25,7 @@ entry: ; CHECK-LABEL: test2: ; CHECK: leal -; CHECK-NOT: leal -; CHECK-NOT: mov +; CHECK-NEXT: addl ; CHECK-NEXT: addl ; CHECK-NEXT: ret %add = add i32 %b, %a Index: llvm/trunk/test/CodeGen/X86/win64_params.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/win64_params.ll +++ llvm/trunk/test/CodeGen/X86/win64_params.ll @@ -7,8 +7,7 @@ entry: ; CHECK: movl 48(%rsp), %eax ; CHECK: addl 40(%rsp), %eax -; LINUX: addl %r9d, %r8d -; LINUX: movl %r8d, %eax +; LINUX: leal (%r8,%r9), %eax %add = add nsw i32 %p6, %p5 ret i32 %add } @@ -27,10 +26,8 @@ ; on other platforms here (note the x86_64_sysvcc calling convention). define x86_64_sysvcc i32 @f8(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize { entry: -; CHECK: addl %r9d, %r8d -; CHECK: movl %r8d, %eax -; LINUX: addl %r9d, %r8d -; LINUX: movl %r8d, %eax +; CHECK: leal (%r8,%r9), %eax +; LINUX: leal (%r8,%r9), %eax %add = add nsw i32 %p6, %p5 ret i32 %add } Index: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll =================================================================== --- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll +++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll @@ -23,7 +23,7 @@ ; X32: add ; X32: add ; X32: add -; X32: add +; X32: leal ; X32: %for.body.3 define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp { entry: