diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -927,6 +927,7 @@ ID.AddInteger(MN->getRawSubclassData()); ID.AddInteger(MN->getPointerInfo().getAddrSpace()); ID.AddInteger(MN->getMemOperand()->getFlags()); + ID.AddInteger(MN->getMemOperand()->getSize()); } } @@ -8066,6 +8067,7 @@ Opcode, dl.getIROrder(), VTList, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); ID.AddInteger(MMO->getFlags()); + ID.AddInteger(MMO->getSize()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); diff --git a/llvm/test/CodeGen/NVPTX/dag-cse.ll b/llvm/test/CodeGen/NVPTX/dag-cse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/dag-cse.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=nvptx64 | FileCheck %s + +%st = type { i8, i8, i16 } + +@a = internal addrspace(1) global %st zeroinitializer, align 8 +@b = internal addrspace(1) global i32 0, align 8 +@c = internal addrspace(1) global i32 0, align 8 + +; Verify that loads with different memory types are not subject to CSE +; once they are promoted to the same type. +; +; CHECK: ld.global.v2.u8 {%[[B1:rs[0-9]+]], %[[B2:rs[0-9]+]]}, [a]; +; CHECK: st.global.v2.u8 [b], {%[[B1]], %[[B2]]}; +; +; CHECK: ld.global.v2.u16 {%[[C1:rs[0-9]+]], %[[C2:rs[0-9]+]]}, [a]; +; CHECK: st.global.v2.u16 [c], {%[[C1]], %[[C2]]}; + +define void @test1() #0 { + %1 = load <2 x i8>, ptr addrspace(1) @a, align 8 + store <2 x i8> %1, ptr addrspace(1) @b, align 8 + %2 = load <2 x i16>, ptr addrspace(1) @a, align 8 + store <2 x i16> %2, ptr addrspace(1) @c, align 8 + ret void +} diff --git a/llvm/test/CodeGen/X86/dag-cse-target.ll b/llvm/test/CodeGen/X86/dag-cse-target.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/dag-cse-target.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=x86_64-unknown -debug 2>&1 | FileCheck %s + +; Verify that CSE works for identical target memory nodes: +; +; CHECK: Creating new node: t{{[0-9]+}}: v8f16,ch = X86ISD::VBROADCAST_LOAD<(load (s16) from `ptr undef` + 4)> t0, undef:i64 +; CHECK-NOT: Creating new node: t{{[0-9]+}}: v8f16,ch = X86ISD::VBROADCAST_LOAD<(load (s16) from `ptr undef` + 4)> t0, undef:i64 +; +define void @t() #1 { +entry: + %elem = load half, ptr undef, align 16 + %load1 = insertelement <16 x half> poison, half %elem, i64 15 + %load2 = load <16 x half>, ptr undef, align 2 + %shuffle = shufflevector <16 x half> %load1, <16 x half> %load2, <16 x i32> + + %cmp = fcmp oeq <16 x half> %load2, %shuffle + %zext = zext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %zext, ptr undef, align 16 + ret void +} + +attributes #1 = { nounwind uwtable "target-cpu"="skx" }