Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -1616,6 +1616,7 @@ bool UseTBAA) { const MachineFunction *MF = getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); // If neither instruction stores to memory, they can't alias in any // meaningful way, even if they read from the same address. @@ -1626,9 +1627,6 @@ if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA)) return false; - if (!AA) - return true; - // FIXME: Need to handle multiple memory operands to support all targets. if (!hasOneMemOperand() || !Other.hasOneMemOperand()) return true; @@ -1636,9 +1634,6 @@ MachineMemOperand *MMOa = *memoperands_begin(); MachineMemOperand *MMOb = *Other.memoperands_begin(); - if (!MMOa->getValue() || !MMOb->getValue()) - return true; - // The following interface to AA is fashioned after DAGCombiner::isAlias // and operates with MachineMemOperand offset with some important // assumptions: @@ -1651,22 +1646,52 @@ // - There should never be any negative offsets here. // // FIXME: Modify API to hide this math from "user" - // FIXME: Even before we go to AA we can reason locally about some + // Even before we go to AA we can reason locally about some // memory objects. It can save compile time, and possibly catch some // corner cases not currently covered. - assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); - assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + int64_t OffsetA = MMOa->getOffset(); + int64_t OffsetB = MMOb->getOffset(); + + assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); + assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); + + int64_t MinOffset = std::min(OffsetA, OffsetB); + int64_t WidthA = MMOa->getSize(); + int64_t WidthB = MMOb->getSize(); + const Value *ValA = MMOa->getValue(); + const Value *ValB = MMOb->getValue(); + bool SameVal = (ValA && ValB && (ValA == ValB)); + if (!SameVal) { + const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); + const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); + if (PSVa && PSVa->isConstant(&MFI)) + return false; + if (PSVb && PSVb->isConstant(&MFI)) + return false; + if (PSVa && PSVb && (PSVa == PSVb)) + SameVal = true; + } + + if (SameVal) { + int64_t MaxOffset = std::max(OffsetA, OffsetB); + int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; + return (MinOffset + LowWidth > MaxOffset); + } + + if (!AA) + return true; + + if (!ValA || !ValB) + return true; - int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); - int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; - int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; + int64_t Overlapa = WidthA + OffsetA - MinOffset; + int64_t Overlapb = WidthB + OffsetB - MinOffset; - AliasResult AAResult = - AA->alias(MemoryLocation(MMOa->getValue(), Overlapa, - UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), - MemoryLocation(MMOb->getValue(), Overlapb, - UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); + AliasResult AAResult = AA->alias( + MemoryLocation(ValA, Overlapa, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(ValB, Overlapb, + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); return (AAResult != NoAlias); } Index: test/CodeGen/AArch64/ldst-opt.ll =================================================================== --- test/CodeGen/AArch64/ldst-opt.ll +++ test/CodeGen/AArch64/ldst-opt.ll @@ -1531,7 +1531,7 @@ ; CHECK-LABEL: merge_zr64_unalign: ; CHECK: // %entry ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] -; STRICTALIGN: strb wzr, +; STRICTALIGN: strb ; STRICTALIGN: strb ; STRICTALIGN: strb ; STRICTALIGN: strb Index: test/CodeGen/AMDGPU/load-global-i16.ll =================================================================== --- test/CodeGen/AMDGPU/load-global-i16.ll +++ test/CodeGen/AMDGPU/load-global-i16.ll @@ -179,8 +179,8 @@ ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 @@ -188,8 +188,6 @@ ; TODO: This should use DST, but for some there are redundant MOVs ; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal ; EGCM: 16 -; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal -; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in @@ -202,8 +200,8 @@ ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1 Index: test/CodeGen/AMDGPU/load-global-i8.ll =================================================================== --- test/CodeGen/AMDGPU/load-global-i8.ll +++ test/CodeGen/AMDGPU/load-global-i8.ll @@ -352,22 +352,22 @@ ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 ; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal ; EG-DAG: 8 ; EG-DAG: 8 ; EG-DAG: 8 Index: test/CodeGen/AMDGPU/load-local-i16.ll =================================================================== --- test/CodeGen/AMDGPU/load-local-i16.ll +++ test/CodeGen/AMDGPU/load-local-i16.ll @@ -530,7 +530,6 @@ ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y ; EG-DAG: LDS_WRITE -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { %a = load i16, i16 addrspace(3)* %in %ext = zext i16 %a to i64 @@ -572,7 +571,6 @@ ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y ; EG-DAG: LDS_WRITE -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { %load = load <1 x i16>, <1 x i16> addrspace(3)* %in %ext = zext <1 x i16> %load to <1 x i64> Index: test/CodeGen/ARM/2009-10-27-double-align.ll =================================================================== --- test/CodeGen/ARM/2009-10-27-double-align.ll +++ test/CodeGen/ARM/2009-10-27-double-align.ll @@ -1,13 +1,15 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s --check-prefix=NOREGALLOC +; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s --check-prefix=REGALLOC @.str = private constant [1 x i8] zeroinitializer, align 1 define void @g() { entry: ;CHECK: [sp, #8] -;CHECK: [sp, #12] -;CHECK: [sp] +;NOREGALLOC: [sp, #12] +;NOREGALLOC: [sp] +;REGALLOC: [sp] +;REGALLOC: [sp, #12] tail call void (i8*, ...) @f(i8* getelementptr ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00) ret void } Index: test/CodeGen/ARM/illegal-bitfield-loadstore.ll =================================================================== --- test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -124,11 +124,11 @@ ; BE-LABEL: i56_and_or: ; BE: @ BB#0: ; BE-NEXT: mov r1, r0 +; BE-NEXT: ldr r12, [r0] ; BE-NEXT: mov r3, #128 ; BE-NEXT: ldrh r2, [r1, #4]! -; BE-NEXT: strb r3, [r1, #2] ; BE-NEXT: lsl r2, r2, #8 -; BE-NEXT: ldr r12, [r0] +; BE-NEXT: strb r3, [r1, #2] ; BE-NEXT: orr r2, r2, r12, lsl #24 ; BE-NEXT: orr r2, r2, #384 ; BE-NEXT: lsr r3, r2, #8 Index: test/CodeGen/X86/illegal-bitfield-loadstore.ll =================================================================== --- test/CodeGen/X86/illegal-bitfield-loadstore.ll +++ test/CodeGen/X86/illegal-bitfield-loadstore.ll @@ -65,17 +65,17 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movzwl 4(%rdi), %eax ; CHECK-NEXT: movzbl 6(%rdi), %ecx -; CHECK-NEXT: movl (%rdi), %edx ; CHECK-NEXT: movb %cl, 6(%rdi) ; CHECK-NEXT: # kill: %ECX %ECX %RCX %RCX ; CHECK-NEXT: shll $16, %ecx ; CHECK-NEXT: orl %eax, %ecx ; CHECK-NEXT: shlq $32, %rcx -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: orq $384, %rdx # imm = 0x180 -; CHECK-NEXT: movl %edx, (%rdi) -; CHECK-NEXT: shrq $32, %rdx -; CHECK-NEXT: movw %dx, 4(%rdi) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: orq $384, %rax # imm = 0x180 +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movw %ax, 4(%rdi) ; CHECK-NEXT: retq %aa = load i56, i56* %a, align 1 %b = or i56 %aa, 384 @@ -88,19 +88,19 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movzwl 4(%rdi), %eax ; CHECK-NEXT: movzbl 6(%rdi), %ecx -; CHECK-NEXT: movl (%rdi), %edx ; CHECK-NEXT: movb %cl, 6(%rdi) ; CHECK-NEXT: # kill: %ECX %ECX %RCX %RCX ; CHECK-NEXT: shll $16, %ecx ; CHECK-NEXT: orl %eax, %ecx ; CHECK-NEXT: shlq $32, %rcx -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: orq $384, %rdx # imm = 0x180 -; CHECK-NEXT: movabsq $72057594037927808, %rax # imm = 0xFFFFFFFFFFFF80 -; CHECK-NEXT: andq %rdx, %rax -; CHECK-NEXT: movl %eax, (%rdi) -; CHECK-NEXT: shrq $32, %rax -; CHECK-NEXT: movw %ax, 4(%rdi) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: orq $384, %rax # imm = 0x180 +; CHECK-NEXT: movabsq $72057594037927808, %rcx # imm = 0xFFFFFFFFFFFF80 +; CHECK-NEXT: andq %rax, %rcx +; CHECK-NEXT: movl %ecx, (%rdi) +; CHECK-NEXT: shrq $32, %rcx +; CHECK-NEXT: movw %cx, 4(%rdi) ; CHECK-NEXT: retq %b = load i56, i56* %a, align 1 %c = and i56 %b, -128 @@ -115,20 +115,20 @@ ; CHECK-NEXT: movzbl %sil, %eax ; CHECK-NEXT: movzwl 4(%rdi), %ecx ; CHECK-NEXT: movzbl 6(%rdi), %edx -; CHECK-NEXT: movl (%rdi), %esi ; CHECK-NEXT: movb %dl, 6(%rdi) ; CHECK-NEXT: # kill: %EDX %EDX %RDX %RDX ; CHECK-NEXT: shll $16, %edx ; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: shlq $32, %rdx -; CHECK-NEXT: orq %rdx, %rsi +; CHECK-NEXT: movl (%rdi), %ecx +; CHECK-NEXT: orq %rdx, %rcx ; CHECK-NEXT: shlq $13, %rax -; CHECK-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF -; CHECK-NEXT: andq %rsi, %rcx -; CHECK-NEXT: orq %rax, %rcx -; CHECK-NEXT: movl %ecx, (%rdi) -; CHECK-NEXT: shrq $32, %rcx -; CHECK-NEXT: movw %cx, 4(%rdi) +; CHECK-NEXT: movabsq $72057594037919743, %rdx # imm = 0xFFFFFFFFFFDFFF +; CHECK-NEXT: andq %rcx, %rdx +; CHECK-NEXT: orq %rax, %rdx +; CHECK-NEXT: movl %edx, (%rdi) +; CHECK-NEXT: shrq $32, %rdx +; CHECK-NEXT: movw %dx, 4(%rdi) ; CHECK-NEXT: retq %extbit = zext i1 %bit to i56 %b = load i56, i56* %a, align 1