Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1428,7 +1428,7 @@ RegisterSDNode *RN = dyn_cast(Base); if (RN && RN->getReg() == 0) Base = CurDAG->getRegister(0, MVT::i64); - else if (Base.getValueType() == MVT::i32 && !dyn_cast(N)) { + else if (Base.getValueType() == MVT::i32 && !dyn_cast(Base)) { // Base could already be %rip, particularly in the x32 ABI. Base = SDValue(CurDAG->getMachineNode( TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, Index: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp @@ -489,6 +489,12 @@ else BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr); + // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit + // register as source operand, semantic is the same and destination is + // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. + if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) + BasePtr = getX86SubSuperRegister(BasePtr, MVT::i64, false); + // This must be part of a four operand memory reference. Replace the // FrameIndex with base register with EBP. Add an offset to the offset. MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); Index: llvm/trunk/test/CodeGen/X86/lea-2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lea-2.ll +++ llvm/trunk/test/CodeGen/X86/lea-2.ll @@ -1,4 +1,7 @@ -; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s +; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux -x86-asm-syntax=intel | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -x86-asm-syntax=intel | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-nacl -x86-asm-syntax=intel | FileCheck %s define i32 @test1(i32 %A, i32 %B) { %tmp1 = shl i32 %A, 2 Index: llvm/trunk/test/CodeGen/X86/lea-3.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lea-3.ll +++ llvm/trunk/test/CodeGen/X86/lea-3.ll @@ -1,4 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s ; CHECK: leaq (,[[A0:%rdi|%rcx]],4), %rax Index: llvm/trunk/test/CodeGen/X86/lea-4.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lea-4.ll +++ llvm/trunk/test/CodeGen/X86/lea-4.ll @@ -1,4 +1,7 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s + define zeroext i16 @t1(i32 %on_off) nounwind { entry: Index: llvm/trunk/test/CodeGen/X86/lea-5.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lea-5.ll +++ llvm/trunk/test/CodeGen/X86/lea-5.ll @@ -0,0 +1,59 @@ +; test for more complicated forms of lea operands which can be generated +; in loop optimized cases. +; See also http://llvm.org/bugs/show_bug.cgi?id=20016 + +; RUN: llc < %s -mtriple=x86_64-linux -O2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -O2 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-nacl -O2 | FileCheck %s -check-prefix=X32 + +; Function Attrs: nounwind readnone uwtable +define void @foo(i32 %x, i32 %d) #0 { +entry: + %a = alloca [8 x i32], align 16 + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %d.addr.0 = phi i32 [ %d, %entry ], [ %inc, %while.cond ] + %arrayidx = getelementptr inbounds [8 x i32]* %a, i32 0, i32 %d.addr.0 + +; CHECK: leaq -40(%rsp,%r{{[^,]*}},4), %rax +; X32: leal -40(%rsp,%r{{[^,]*}},4), %eax + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp eq i32 %0, 0 + %inc = add nsw i32 %d.addr.0, 1 + +; CHECK: leaq 4(%r{{[^,]*}}), %r{{[^,]*}} +; X32: leal 4(%r{{[^,]*}}), %e{{[^,]*}} + br i1 %cmp1, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret void +} + +; The same test as above but with enforsed stack realignment (%a aligned by 64) +; to check one more case of correct lea generation. + +; Function Attrs: nounwind readnone uwtable +define void @bar(i32 %x, i32 %d) #0 { +entry: + %a = alloca [8 x i32], align 64 + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %d.addr.0 = phi i32 [ %d, %entry ], [ %inc, %while.cond ] + %arrayidx = getelementptr inbounds [8 x i32]* %a, i32 0, i32 %d.addr.0 + +; CHECK: leaq (%rsp,%r{{[^,]*}},4), %rax +; X32: leal (%rsp,%r{{[^,]*}},4), %eax + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp eq i32 %0, 0 + %inc = add nsw i32 %d.addr.0, 1 + +; CHECK: leaq 4(%r{{[^,]*}}), %r{{[^,]*}} +; X32: leal 4(%r{{[^,]*}}), %e{{[^,]*}} + br i1 %cmp1, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + ret void +} + Index: llvm/trunk/test/CodeGen/X86/lea.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lea.ll +++ llvm/trunk/test/CodeGen/X86/lea.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s define i32 @test1(i32 %x) nounwind { %tmp1 = shl i32 %x, 3