Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -1626,9 +1626,6 @@ if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA)) return false; - if (!AA) - return true; - // FIXME: Need to handle multiple memory operands to support all targets. if (!hasOneMemOperand() || !Other.hasOneMemOperand()) return true; @@ -1651,16 +1648,38 @@ // - There should never be any negative offsets here. // // FIXME: Modify API to hide this math from "user" - // FIXME: Even before we go to AA we can reason locally about some + // Even before we go to AA we can reason locally about some // memory objects. It can save compile time, and possibly catch some // corner cases not currently covered. - assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); - assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + int64_t OffsetA = MMOa->getOffset(); + int64_t OffsetB = MMOb->getOffset(); + + assert((OffsetA >= 0) && "Negative MachineMemOperand offset"); + assert((OffsetB >= 0) && "Negative MachineMemOperand offset"); + + int64_t MinOffset = std::min(OffsetA, OffsetB); + int64_t WidthA = MMOa->getSize(); + int64_t WidthB = MMOb->getSize(); + bool SameVal = MMOa->getValue() == MMOb->getValue(); + if (!SameVal) { + const PseudoSourceValue *PSVa = MMOa->getPseudoValue(); + const PseudoSourceValue *PSVb = MMOb->getPseudoValue(); + if (PSVa && PSVb && (PSVa == PSVb)) + SameVal = true; + } + + if (SameVal) { + int64_t MaxOffset = std::max(OffsetA, OffsetB); + int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB; + return (MinOffset + LowWidth > MaxOffset); + } + + if (!AA) + return true; - int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); - int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; - int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; + int64_t Overlapa = WidthA + OffsetA - MinOffset; + int64_t Overlapb = WidthB + OffsetB - MinOffset; AliasResult AAResult = AA->alias(MemoryLocation(MMOa->getValue(), Overlapa, Index: test/CodeGen/AArch64/ldst-opt.ll =================================================================== --- test/CodeGen/AArch64/ldst-opt.ll +++ test/CodeGen/AArch64/ldst-opt.ll @@ -1531,7 +1531,7 @@ ; CHECK-LABEL: merge_zr64_unalign: ; CHECK: // %entry ; NOSTRICTALIGN-NEXT: stp xzr, xzr, [x{{[0-9]+}}] -; STRICTALIGN: strb wzr, +; STRICTALIGN: strb ; STRICTALIGN: strb ; STRICTALIGN: strb ; STRICTALIGN: strb Index: test/CodeGen/AMDGPU/load-global-i16.ll =================================================================== --- test/CodeGen/AMDGPU/load-global-i16.ll +++ test/CodeGen/AMDGPU/load-global-i16.ll @@ -179,8 +179,8 @@ ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 @@ -188,8 +188,6 @@ ; TODO: This should use DST, but for some there are redundant MOVs ; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal ; EGCM: 16 -; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal -; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in @@ -202,8 +200,8 @@ ; GCN-NOHSA: buffer_load_dwordx2 ; GCN-HSA: flat_load_dwordx2 -; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} +; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, ; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1 Index: test/CodeGen/AMDGPU/load-global-i8.ll =================================================================== --- test/CodeGen/AMDGPU/load-global-i8.ll +++ test/CodeGen/AMDGPU/load-global-i8.ll @@ -352,22 +352,22 @@ ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 ; TODO: These should use DST, but for some there are redundant MOVs -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal ; EG-DAG: 8 ; EG-DAG: 8 ; EG-DAG: 8 Index: test/CodeGen/AMDGPU/load-local-i16.ll =================================================================== --- test/CodeGen/AMDGPU/load-local-i16.ll +++ test/CodeGen/AMDGPU/load-local-i16.ll @@ -530,7 +530,6 @@ ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y ; EG-DAG: LDS_WRITE -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { %a = load i16, i16 addrspace(3)* %in %ext = zext i16 %a to i64 @@ -572,7 +571,6 @@ ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y ; EG-DAG: LDS_WRITE -; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { %load = load <1 x i16>, <1 x i16> addrspace(3)* %in %ext = zext <1 x i16> %load to <1 x i64> Index: test/CodeGen/ARM/illegal-bitfield-loadstore.ll =================================================================== --- test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -124,11 +124,11 @@ ; BE-LABEL: i56_and_or: ; BE: @ BB#0: ; BE-NEXT: mov r1, r0 +; BE-NEXT: ldr r12, [r0] ; BE-NEXT: mov r3, #128 ; BE-NEXT: ldrh r2, [r1, #4]! -; BE-NEXT: strb r3, [r1, #2] ; BE-NEXT: lsl r2, r2, #8 -; BE-NEXT: ldr r12, [r0] +; BE-NEXT: strb r3, [r1, #2] ; BE-NEXT: orr r2, r2, r12, lsl #24 ; BE-NEXT: orr r2, r2, #384 ; BE-NEXT: lsr r3, r2, #8 Index: test/CodeGen/X86/illegal-bitfield-loadstore.ll =================================================================== --- test/CodeGen/X86/illegal-bitfield-loadstore.ll +++ test/CodeGen/X86/illegal-bitfield-loadstore.ll @@ -65,17 +65,17 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movzwl 4(%rdi), %eax ; CHECK-NEXT: movzbl 6(%rdi), %ecx -; CHECK-NEXT: movl (%rdi), %edx ; CHECK-NEXT: movb %cl, 6(%rdi) ; CHECK-NEXT: # kill: %ECX %ECX %RCX %RCX ; CHECK-NEXT: shll $16, %ecx ; CHECK-NEXT: orl %eax, %ecx ; CHECK-NEXT: shlq $32, %rcx -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: orq $384, %rdx # imm = 0x180 -; CHECK-NEXT: movl %edx, (%rdi) -; CHECK-NEXT: shrq $32, %rdx -; CHECK-NEXT: movw %dx, 4(%rdi) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: orq $384, %rax # imm = 0x180 +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movw %ax, 4(%rdi) ; CHECK-NEXT: retq %aa = load i56, i56* %a, align 1 %b = or i56 %aa, 384 @@ -88,19 +88,19 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movzwl 4(%rdi), %eax ; CHECK-NEXT: movzbl 6(%rdi), %ecx -; CHECK-NEXT: movl (%rdi), %edx ; CHECK-NEXT: movb %cl, 6(%rdi) ; CHECK-NEXT: # kill: %ECX %ECX %RCX %RCX ; CHECK-NEXT: shll $16, %ecx ; CHECK-NEXT: orl %eax, %ecx ; CHECK-NEXT: shlq $32, %rcx -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: orq $384, %rdx # imm = 0x180 -; CHECK-NEXT: movabsq $72057594037927808, %rax # imm = 0xFFFFFFFFFFFF80 -; CHECK-NEXT: andq %rdx, %rax -; CHECK-NEXT: movl %eax, (%rdi) -; CHECK-NEXT: shrq $32, %rax -; CHECK-NEXT: movw %ax, 4(%rdi) +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: orq $384, %rax # imm = 0x180 +; CHECK-NEXT: movabsq $72057594037927808, %rcx # imm = 0xFFFFFFFFFFFF80 +; CHECK-NEXT: andq %rax, %rcx +; CHECK-NEXT: movl %ecx, (%rdi) +; CHECK-NEXT: shrq $32, %rcx +; CHECK-NEXT: movw %cx, 4(%rdi) ; CHECK-NEXT: retq %b = load i56, i56* %a, align 1 %c = and i56 %b, -128 @@ -115,20 +115,20 @@ ; CHECK-NEXT: movzbl %sil, %eax ; CHECK-NEXT: movzwl 4(%rdi), %ecx ; CHECK-NEXT: movzbl 6(%rdi), %edx -; CHECK-NEXT: movl (%rdi), %esi ; CHECK-NEXT: movb %dl, 6(%rdi) ; CHECK-NEXT: # kill: %EDX %EDX %RDX %RDX ; CHECK-NEXT: shll $16, %edx ; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: shlq $32, %rdx -; CHECK-NEXT: orq %rdx, %rsi +; CHECK-NEXT: movl (%rdi), %ecx +; CHECK-NEXT: orq %rdx, %rcx ; CHECK-NEXT: shlq $13, %rax -; CHECK-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF -; CHECK-NEXT: andq %rsi, %rcx -; CHECK-NEXT: orq %rax, %rcx -; CHECK-NEXT: movl %ecx, (%rdi) -; CHECK-NEXT: shrq $32, %rcx -; CHECK-NEXT: movw %cx, 4(%rdi) +; CHECK-NEXT: movabsq $72057594037919743, %rdx # imm = 0xFFFFFFFFFFDFFF +; CHECK-NEXT: andq %rcx, %rdx +; CHECK-NEXT: orq %rax, %rdx +; CHECK-NEXT: movl %edx, (%rdi) +; CHECK-NEXT: shrq $32, %rdx +; CHECK-NEXT: movw %dx, 4(%rdi) ; CHECK-NEXT: retq %extbit = zext i1 %bit to i56 %b = load i56, i56* %a, align 1