Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1429,7 +1429,7 @@ RegisterSDNode *RN = dyn_cast(Base); if (RN && RN->getReg() == 0) Base = CurDAG->getRegister(0, MVT::i64); - else if (Base.getValueType() == MVT::i32 && !dyn_cast(N)) { + else if (Base.getValueType() == MVT::i32 && !dyn_cast(Base)) { // Base could already be %rip, particularly in the x32 ABI. Base = SDValue(CurDAG->getMachineNode( TargetOpcode::SUBREG_TO_REG, DL, MVT::i64, Index: lib/Target/X86/X86RegisterInfo.cpp =================================================================== --- lib/Target/X86/X86RegisterInfo.cpp +++ lib/Target/X86/X86RegisterInfo.cpp @@ -489,6 +489,12 @@ else BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr); + // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit + // register as source operand, semantic is the same and destination is + // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. + if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) + BasePtr = getX86SubSuperRegister(BasePtr, MVT::i64, false); + // This must be part of a four operand memory reference. Replace the // FrameIndex with base register with EBP. Add an offset to the offset. MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false); Index: test/CodeGen/X86/lea-2.ll =================================================================== --- test/CodeGen/X86/lea-2.ll +++ test/CodeGen/X86/lea-2.ll @@ -1,4 +1,7 @@ -; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s +; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux -x86-asm-syntax=intel | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -x86-asm-syntax=intel | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-nacl -x86-asm-syntax=intel | FileCheck %s define i32 @test1(i32 %A, i32 %B) { %tmp1 = shl i32 %A, 2 Index: test/CodeGen/X86/lea-3.ll =================================================================== --- test/CodeGen/X86/lea-3.ll +++ test/CodeGen/X86/lea-3.ll @@ -1,4 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s ; CHECK: leaq (,[[A0:%rdi|%rcx]],4), %rax Index: test/CodeGen/X86/lea-4.ll =================================================================== --- test/CodeGen/X86/lea-4.ll +++ test/CodeGen/X86/lea-4.ll @@ -1,4 +1,7 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s + define zeroext i16 @t1(i32 %on_off) nounwind { entry: Index: test/CodeGen/X86/lea-5.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/lea-5.ll @@ -0,0 +1,49 @@ +; RUN: llc < %s -mtriple=x86_64-linux -O2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -O2 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-nacl -O2 | FileCheck %s -check-prefix=X32 + +define void @bar(i32 %x, i32 %d) nounwind uwtable { +entry: + %a = alloca [8 x i32], align 16 + %cmp = icmp slt i32 %d, 8 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %arraydecay = getelementptr inbounds [8 x i32]* %a, i32 0, i32 0 + +; CHECK: leaq (%rsp), %rdi +; X32: leal (%rsp), %edi + call void @foo(i32* %arraydecay) nounwind + br label %while.cond + +while.cond: ; preds = %while.cond, %if.then + %d.addr.0 = phi i32 [ %d, %if.then ], [ %inc, %while.cond ] + %arrayidx = getelementptr inbounds [8 x i32]* %a, i32 0, i32 %d.addr.0 + +; CHECK: leaq (%rsp,%r{{[^,]*}},4), %rax +; X32: leal (%rsp,%r{{[^,]*}},4), %eax + %0 = load i32* %arrayidx, align 4, !tbaa !0 + %cmp1 = icmp eq i32 %0, 0 + %inc = add nsw i32 %d.addr.0, 1 + +; CHECK: leaq 4(%r{{[^,]*}}), %r{{[^,]*}} +; X32: leal 4(%r{{[^,]*}}), %e{{[^,]*}} + br i1 %cmp1, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + store i32 %x, i32* %arrayidx, align 4, !tbaa !0 + +; CHECK: leaq (%rsp), %rdi +; X32: leal (%rsp), %edi + call void @foo(i32* %arraydecay) nounwind + br label %if.end + +if.end: ; preds = %while.end, %entry + ret void +} + +declare void @foo(i32*) + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} Index: test/CodeGen/X86/lea.ll =================================================================== --- test/CodeGen/X86/lea.ll +++ test/CodeGen/X86/lea.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s define i32 @test1(i32 %x) nounwind { %tmp1 = shl i32 %x, 3