Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -1072,10 +1072,20 @@ FeatureCMOV, FeatureInsertVZEROUPPER]>; foreach P = ["pentium4", "pentium4m"] in { +// def : ProcessorModel; + + // Since 'pentirum4' is the default 32-bit CPU on Linux and Windows, + // give it more modern tunings. + // FIXME: This wouldn't be needed if we supported mtune. def : ProcessorModel; + FeatureCMOV, FeatureInsertVZEROUPPER, + FeatureSlow3OpsLEA, FeatureSlowDivide64, + FeatureSlowIncDec, FeatureMacroFusion]>; } // Intel Quark. Index: llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll =================================================================== --- llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll +++ llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll @@ -16,19 +16,19 @@ ; X32-LABEL: func_cf_vector_x86 ; X32: movl 12(%ebp), %eax ; X32: movl 8(%ebp), %ecx - ; X32: movsd 24(%eax), %xmm4 # xmm4 = mem[0],zero - ; X32: movsd %xmm4, 24(%esp) - ; X32: movsd 16(%eax), %xmm5 # xmm5 = mem[0],zero - ; X32: movsd %xmm5, 16(%esp) - ; X32: movsd (%eax), %xmm6 # xmm6 = mem[0],zero - ; X32: movsd 8(%eax), %xmm7 # xmm7 = mem[0],zero - ; X32: movsd %xmm7, 8(%esp) - ; X32: movsd %xmm6, (%esp) + ; X32: movups (%eax), %xmm0 + ; X32: movups 16(%eax), %xmm1 + ; X32: movaps %xmm0, (%esp) + ; X32: movaps %xmm1, 16(%esp) + ; X32: movsd (%esp), %xmm4 + ; X32: movsd 8(%esp), %xmm5 + ; X32: movsd 16(%esp), %xmm6 + ; X32: movsd 24(%esp), %xmm7 ; X32: calll *___guard_check_icall_fptr - ; X32: movaps %xmm6, %xmm0 - ; X32: movaps %xmm7, %xmm1 - ; X32: movaps %xmm5, %xmm2 - ; X32: movaps %xmm4, %xmm3 + ; X32: movaps %xmm4, %xmm0 + ; X32: movaps %xmm5, %xmm1 + ; X32: movaps %xmm6, %xmm2 + ; X32: movaps %xmm7, %xmm3 ; X32: calll *%ecx } attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } Index: llvm/test/CodeGen/X86/slow-unaligned-mem.ll =================================================================== --- llvm/test/CodeGen/X86/slow-unaligned-mem.ll +++ llvm/test/CodeGen/X86/slow-unaligned-mem.ll @@ -3,8 +3,6 @@ ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefix=SLOW ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefix=SLOW ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=SLOW -; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=SLOW ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=SLOW ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=SLOW ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=SLOW @@ -14,6 +12,10 @@ ; Intel chips with fast unaligned memory accesses +; Marked fast because this is the default 32-bit mode CPU in clang. +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=FAST +; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=FAST + ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST Index: llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll =================================================================== --- llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll +++ llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll @@ -40,7 +40,7 @@ ; OBJ: SubSectionType: FrameData (0xF5) ; OBJ: FrameData { ; OBJ: RvaStart: 0x0 -; OBJ: CodeSize: 0x34 +; OBJ: CodeSize: 0x36 ; OBJ: PrologSize: 0x9 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = @@ -50,7 +50,7 @@ ; OBJ: } ; OBJ: FrameData { ; OBJ: RvaStart: 0x7 -; OBJ: CodeSize: 0x2D +; OBJ: CodeSize: 0x2F ; OBJ: PrologSize: 0x2 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = @@ -61,7 +61,7 @@ ; OBJ: } ; OBJ: FrameData { ; OBJ: RvaStart: 0x8 -; OBJ: CodeSize: 0x2C +; OBJ: CodeSize: 0x2E ; OBJ: PrologSize: 0x1 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = @@ -73,7 +73,7 @@ ; OBJ: } ; OBJ: FrameData { ; OBJ: RvaStart: 0x9 -; OBJ: CodeSize: 0x2B +; OBJ: CodeSize: 0x2D ; OBJ: PrologSize: 0x0 ; OBJ: FrameFunc [ ; OBJ-NEXT: $T0 .raSearch = Index: llvm/test/DebugInfo/COFF/types-array.ll =================================================================== --- llvm/test/DebugInfo/COFF/types-array.ll +++ llvm/test/DebugInfo/COFF/types-array.ll @@ -51,7 +51,7 @@ ; CHECK: PtrParent: 0x0 ; CHECK: PtrEnd: 0x0 ; CHECK: PtrNext: 0x0 -; CHECK: CodeSize: 0x39 +; CHECK: CodeSize: 0x2A ; CHECK: DbgStart: 0x0 ; CHECK: DbgEnd: 0x0 ; CHECK: FunctionType: f (0x1002) @@ -73,7 +73,7 @@ ; CHECK: LocalVariableAddrRange { ; CHECK: OffsetStart: .text+0x6 ; CHECK: ISectStart: 0x0 -; CHECK: Range: 0x33 +; CHECK: Range: 0x24 ; CHECK: } ; CHECK: } ; CHECK: ProcEnd {