Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -1663,6 +1663,7 @@ bool UseTBAA) { const MachineFunction *MF = getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); // If neither instruction stores to memory, they can't alias in any // meaningful way, even if they read from the same address. @@ -1673,9 +1674,6 @@ if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA)) return false; - if (!AA) - return true; - // FIXME: Need to handle multiple memory operands to support all targets. if (!hasOneMemOperand() || !Other.hasOneMemOperand()) return true; @@ -1683,9 +1681,6 @@ MachineMemOperand *MMOa = *memoperands_begin(); MachineMemOperand *MMOb = *Other.memoperands_begin(); - if (!MMOa->getValue() || !MMOb->getValue()) - return true; - // The following interface to AA is fashioned after DAGCombiner::isAlias // and operates with MachineMemOperand offset with some important // assumptions: @@ -1698,22 +1693,53 @@ // - There should never be any negative offsets here. // // FIXME: Modify API to hide this math from "user" - // FIXME: Even before we go to AA we can reason locally about some + // Even before we go to AA we can reason locally about some // memory objects. It can save compile time, and possibly catch some // corner cases not currently covered. - assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); - assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + int64_t OffsetA = MMOa->getOffset(); + int64_t OffsetB = MMOb->getOffset(); + + int64_t MinOffset = std::min(OffsetA, OffsetB); + int64_t WidthA = MMOa->getSize(); + int64_t WidthB = MMOb->getSize(); + const Value *ValA = MMOa->getValue(); + const Value *ValB = MMOb->getValue(); + bool SameVal = (ValA && ValB && (ValA == ValB)); + if (!SameVal) { + const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); + const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); + if (PSVa && ValB && !PSVa->mayAlias(&MFI)) + return false; + if (PSVb && ValA && !PSVb->mayAlias(&MFI)) + return false; + if (PSVa && PSVb && (PSVa == PSVb)) + SameVal = true; + } + + if (SameVal) { + int64_t MaxOffset = std::max(OffsetA, OffsetB); + int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; + return (MinOffset + LowWidth > MaxOffset); + } + + if (!AA) + return true; + + if (!ValA || !ValB) + return true; + + assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); + assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); - int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); - int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; - int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; + int64_t Overlapa = WidthA + OffsetA - MinOffset; + int64_t Overlapb = WidthB + OffsetB - MinOffset; - AliasResult AAResult = - AA->alias(MemoryLocation(MMOa->getValue(), Overlapa, - UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), - MemoryLocation(MMOb->getValue(), Overlapb, - UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); + AliasResult AAResult = AA->alias( + MemoryLocation(ValA, Overlapa, + UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(ValB, Overlapb, + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); return (AAResult != NoAlias); } Index: test/CodeGen/AArch64/func-calls.ll =================================================================== --- test/CodeGen/AArch64/func-calls.ll +++ test/CodeGen/AArch64/func-calls.ll @@ -130,11 +130,11 @@ i32 42, i128 %val) ; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]] -; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16] +; CHECK: stp [[I128HI]], {{x[0-9]+}}, [sp, #24] ; CHECK-NONEON: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK-NONEON: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]] -; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16] +; CHECK-NONEON: stp [[I128HI]], {{x[0-9]+}}, [sp, #24] ; CHECK: bl check_i128_stackalign call void @check_i128_regalign(i32 0, i128 42) Index: test/CodeGen/AArch64/ldst-opt.ll =================================================================== --- test/CodeGen/AArch64/ldst-opt.ll +++ test/CodeGen/AArch64/ldst-opt.ll @@ -1531,7 +1531,7 @@ ; CHECK-LABEL: merge_zr64_unalign: ; CHECK: // %entry ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] -; STRICTALIGN: strb wzr, +; STRICTALIGN: strb ; STRICTALIGN: strb ; STRICTALIGN: strb ; STRICTALIGN: strb Index: test/CodeGen/AMDGPU/call-argument-types.ll =================================================================== --- test/CodeGen/AMDGPU/call-argument-types.ll +++ test/CodeGen/AMDGPU/call-argument-types.ll @@ -452,15 +452,15 @@ ; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8 ; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12 -; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8 ; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4 +; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8 ; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], s33 offset:8 ; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], s33 offset:12 -; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8 ; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4 +; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8 ; GCN-NEXT: s_swappc_b64 ; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200 @@ -487,8 +487,8 @@ ; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12 ; GCN-DAG: s_add_u32 [[SP]], [[SP]], 0x200 -; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8 ; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4 +; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8 ; GCN-NEXT: s_swappc_b64 ; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16 ; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20 Index: test/CodeGen/AMDGPU/load-global-i16.ll =================================================================== --- test/CodeGen/AMDGPU/load-global-i16.ll +++ test/CodeGen/AMDGPU/load-global-i16.ll @@ -179,8 +179,8 @@ ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 @@ -188,8 +188,6 @@ ; TODO: This should use DST, but for some there are redundant MOVs ; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal ; EGCM: 16 -; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal -; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in @@ -202,8 +200,8 @@ ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1 Index: test/CodeGen/AMDGPU/load-global-i8.ll =================================================================== --- test/CodeGen/AMDGPU/load-global-i8.ll +++ test/CodeGen/AMDGPU/load-global-i8.ll @@ -352,22 +352,22 @@ ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 ; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal ; EG-DAG: 8 ; EG-DAG: 8 ; EG-DAG: 8 Index: test/CodeGen/AMDGPU/load-local-i16.ll =================================================================== --- test/CodeGen/AMDGPU/load-local-i16.ll +++ test/CodeGen/AMDGPU/load-local-i16.ll @@ -530,7 +530,6 @@ ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y ; EG-DAG: LDS_WRITE -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { %a = load i16, i16 addrspace(3)* %in %ext = zext i16 %a to i64 @@ -572,7 +571,6 @@ ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y ; EG-DAG: LDS_WRITE -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { %load = load <1 x i16>, <1 x i16> addrspace(3)* %in %ext = zext <1 x i16> %load to <1 x i64> Index: test/CodeGen/ARM/2009-10-27-double-align.ll =================================================================== --- test/CodeGen/ARM/2009-10-27-double-align.ll +++ test/CodeGen/ARM/2009-10-27-double-align.ll @@ -1,13 +1,15 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s --check-prefix=NOREGALLOC +; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s --check-prefix=REGALLOC @.str = private constant [1 x i8] zeroinitializer, align 1 define void @g() { entry: ;CHECK: [sp, #8] -;CHECK: [sp, #12] -;CHECK: [sp] +;NOREGALLOC: [sp, #12] +;NOREGALLOC: [sp] +;REGALLOC: [sp] +;REGALLOC: [sp, #12] tail call void (i8*, ...) @f(i8* getelementptr ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00) ret void } Index: test/CodeGen/ARM/illegal-bitfield-loadstore.ll =================================================================== --- test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -124,10 +124,10 @@ ; BE-LABEL: i56_and_or: ; BE: @ BB#0: ; BE-NEXT: mov r1, r0 -; BE-NEXT: mov r3, #128 +; BE-NEXT: ldr r12, [r0] ; BE-NEXT: ldrh r2, [r1, #4]! +; BE-NEXT: mov r3, #128 ; BE-NEXT: strb r3, [r1, #2] -; BE-NEXT: ldr r12, [r0] ; BE-NEXT: lsl r2, r2, #8 ; BE-NEXT: orr r2, r2, r12, lsl #24 ; BE-NEXT: orr r2, r2, #384 Index: test/CodeGen/X86/illegal-bitfield-loadstore.ll =================================================================== --- test/CodeGen/X86/illegal-bitfield-loadstore.ll +++ test/CodeGen/X86/illegal-bitfield-loadstore.ll @@ -118,17 +118,17 @@ ; X64: # BB#0: ; X64-NEXT: movzwl 4(%rdi), %eax ; X64-NEXT: movzbl 6(%rdi), %ecx -; X64-NEXT: movl (%rdi), %edx ; X64-NEXT: movb %cl, 6(%rdi) ; X64-NEXT: # kill: %ECX %ECX %RCX %RCX ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: shlq $32, %rcx -; X64-NEXT: orq %rcx, %rdx -; X64-NEXT: orq $384, %rdx # imm = 0x180 -; X64-NEXT: movl %edx, (%rdi) -; X64-NEXT: shrq $32, %rdx -; X64-NEXT: movw %dx, 4(%rdi) +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: orq %rcx, %rax +; X64-NEXT: orq $384, %rax # imm = 0x180 +; X64-NEXT: movl %eax, (%rdi) +; X64-NEXT: shrq $32, %rax +; X64-NEXT: movw %ax, 4(%rdi) ; X64-NEXT: retq %aa = load i56, i56* %a, align 1 %b = or i56 %aa, 384 @@ -150,19 +150,19 @@ ; X64: # BB#0: ; X64-NEXT: movzwl 4(%rdi), %eax ; X64-NEXT: movzbl 6(%rdi), %ecx -; X64-NEXT: movl (%rdi), %edx ; X64-NEXT: movb %cl, 6(%rdi) ; X64-NEXT: # kill: %ECX %ECX %RCX %RCX ; X64-NEXT: shll $16, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: shlq $32, %rcx -; X64-NEXT: orq %rcx, %rdx -; X64-NEXT: orq $384, %rdx # imm = 0x180 -; X64-NEXT: movabsq $72057594037927808, %rax # imm = 0xFFFFFFFFFFFF80 -; X64-NEXT: andq %rdx, %rax -; X64-NEXT: movl %eax, (%rdi) -; X64-NEXT: shrq $32, %rax -; X64-NEXT: movw %ax, 4(%rdi) +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: orq %rcx, %rax +; X64-NEXT: orq $384, %rax # imm = 0x180 +; X64-NEXT: movabsq $72057594037927808, %rcx # imm = 0xFFFFFFFFFFFF80 +; X64-NEXT: andq %rax, %rcx +; X64-NEXT: movl %ecx, (%rdi) +; X64-NEXT: shrq $32, %rcx +; X64-NEXT: movw %cx, 4(%rdi) ; X64-NEXT: retq %b = load i56, i56* %a, align 1 %c = and i56 %b, -128 @@ -188,20 +188,20 @@ ; X64-NEXT: movzbl %sil, %eax ; X64-NEXT: movzwl 4(%rdi), %ecx ; X64-NEXT: movzbl 6(%rdi), %edx -; X64-NEXT: movl (%rdi), %esi ; X64-NEXT: movb %dl, 6(%rdi) ; X64-NEXT: # kill: %EDX %EDX %RDX %RDX ; X64-NEXT: shll $16, %edx ; X64-NEXT: orl %ecx, %edx ; X64-NEXT: shlq $32, %rdx -; X64-NEXT: orq %rdx, %rsi +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: orq %rdx, %rcx ; X64-NEXT: shlq $13, %rax -; X64-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF -; X64-NEXT: andq %rsi, %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: movl %ecx, (%rdi) -; X64-NEXT: shrq $32, %rcx -; X64-NEXT: movw %cx, 4(%rdi) +; X64-NEXT: movabsq $72057594037919743, %rdx # imm = 0xFFFFFFFFFFDFFF +; X64-NEXT: andq %rcx, %rdx +; X64-NEXT: orq %rax, %rdx +; X64-NEXT: movl %edx, (%rdi) +; X64-NEXT: shrq $32, %rdx +; X64-NEXT: movw %dx, 4(%rdi) ; X64-NEXT: retq %extbit = zext i1 %bit to i56 %b = load i56, i56* %a, align 1 Index: test/CodeGen/X86/memcpy-2.ll =================================================================== --- test/CodeGen/X86/memcpy-2.ll +++ test/CodeGen/X86/memcpy-2.ll @@ -12,23 +12,23 @@ define void @t1(i32 %argc, i8** %argv) nounwind { entry: ; SSE2-Darwin-LABEL: t1: -; SSE2-Darwin: movsd _.str+16, %xmm0 -; SSE2-Darwin: movsd %xmm0, 16(%esp) ; SSE2-Darwin: movaps _.str, %xmm0 ; SSE2-Darwin: movaps %xmm0 +; SSE2-Darwin: movsd _.str+16, %xmm0 +; SSE2-Darwin: movsd %xmm0, 16(%esp) ; SSE2-Darwin: movb $0, 24(%esp) ; SSE2-Mingw32-LABEL: t1: -; SSE2-Mingw32: movsd _.str+16, %xmm0 -; SSE2-Mingw32: movsd %xmm0, 16(%esp) ; SSE2-Mingw32: movaps _.str, %xmm0 ; SSE2-Mingw32: movups %xmm0 +; SSE2-Mingw32: movsd _.str+16, %xmm0 +; SSE2-Mingw32: movsd %xmm0, 16(%esp) ; SSE2-Mingw32: movb $0, 24(%esp) ; SSE1-LABEL: t1: ; SSE1: movaps _.str, %xmm0 -; SSE1: movaps %xmm0 ; SSE1: movb $0, 24(%esp) +; SSE1: movaps %xmm0 ; SSE1: movl $0, 20(%esp) ; SSE1: movl $0, 16(%esp) Index: test/CodeGen/X86/pr34088.ll =================================================================== --- test/CodeGen/X86/pr34088.ll +++ test/CodeGen/X86/pr34088.ll @@ -25,8 +25,8 @@ ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movaps %xmm0, (%esp) ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movaps %xmm1, (%esp) ; CHECK-NEXT: movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD +; CHECK-NEXT: movaps %xmm1, (%esp) ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -349,8 +349,8 @@ ; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] ; ATOM-NEXT: paddd %xmm2, %xmm0 ; ATOM-NEXT: paddd %xmm2, %xmm1 -; ATOM-NEXT: movq %xmm1, 16(%rsi) ; ATOM-NEXT: movdqa %xmm0, (%rsi) +; ATOM-NEXT: movq %xmm1, 16(%rsi) ; ATOM-NEXT: retq ; ATOM-NEXT: ## -- End function ; Index: test/CodeGen/X86/widen_arith-3.ll =================================================================== --- test/CodeGen/X86/widen_arith-3.ll +++ test/CodeGen/X86/widen_arith-3.ll @@ -16,9 +16,9 @@ ; CHECK-NEXT: movl {{\.LCPI.*}}, %eax ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) -; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_2: # %forbody