diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17604,11 +17604,9 @@ } } -// We need to check that merging these stores does not cause a loop in -// the DAG. Any store candidate may depend on another candidate -// indirectly through its operand (we already consider dependencies -// through the chain). Check in parallel by searching up from -// non-chain operands of candidates. +// We need to check that merging these stores does not cause a loop in the +// DAG. Any store candidate may depend on another candidate indirectly through +// its operands. Check in parallel by searching up from operands of candidates. bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SmallVectorImpl &StoreNodes, unsigned NumStores, SDNode *RootNode) { @@ -17642,8 +17640,13 @@ SDNode *N = StoreNodes[i].MemNode; // Of the 4 Store Operands: // * Chain (Op 0) -> We have already considered these - // in candidate selection and can be - // safely ignored + // in candidate selection, but only by following the + // chain dependencies. We could still have a chain + // dependency to a load, that has a non-chain dep to + // another load, that depends on a store, etc. So it is + // possible to have dependencies that consist of a mix + // of chain and non-chain deps, and we need to include + // chain operands in the analysis here.. // * Value (Op 1) -> Cycles may happen (e.g. through load chains) // * Address (Op 2) -> Merged addresses may only vary by a fixed constant, // but aren't necessarily fromt the same base node, so @@ -17651,7 +17654,7 @@ // * (Op 3) -> Represents the pre or post-indexing offset (or undef for // non-indexed stores). Not constant on all targets (e.g. ARM) // and so can participate in a cycle. - for (unsigned j = 1; j < N->getNumOperands(); ++j) + for (unsigned j = 0; j < N->getNumOperands(); ++j) Worklist.push_back(N->getOperand(j).getNode()); } // Search through DAG. We can stop early if we find a store node. diff --git a/llvm/test/CodeGen/AArch64/aarch64-checkMergeStoreCandidatesForDependencies.ll b/llvm/test/CodeGen/AArch64/aarch64-checkMergeStoreCandidatesForDependencies.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-checkMergeStoreCandidatesForDependencies.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple aarch64-- -o - %s | FileCheck %s + +; This used to fail when legalizing types, due to not detecting that a cycle +; in the DAG was introduced when merging consecutive stores. +; +; When checking for cycles the DAG looks like this: +; +; SelectionDAG has 16 nodes: +; t0: ch = EntryToken +; t3: i64 = add nuw GlobalAddress:i64<%str0* @g0> 0, Constant:i64<8> +; t6: ch = store<(store (s64) into %ir.sp1, align 1, !tbaa !1)> t0, Constant:i64<0>, t3, undef:i64 +; t7: i64,ch = load<(load (s64) from `%str1** undef`, align 1)> t6, undef:i64, undef:i64 +; t8: i64 = add nuw t7, Constant:i64<8> +; t9: i64,ch = load<(load (s64) from %ir.lp0, align 1)> t6, t8, undef:i64 +; t21: ch = store<(store (s64) into %ir.sp01, align 1)> t19:1, Constant:i64<0>, GlobalAddress:i64<%str0* @g0> 0, undef:i64 +; t24: ch = TokenFactor t7:1, t9:1, t21 +; t14: ch,glue = CopyToReg t24, Register:i64 $x0, t19 +; t19: i64,ch = load<(load (s64) from %ir.lp12, align 1, !tbaa !7)> t0, t9, undef:i64 +; t15: ch = AArch64ISD::RET_FLAG t14, Register:i64 $x0, t14:1 +; +; The t21 store depends on t19 (via a chain dependency), +; t19 load depends on t9 (via address operand), +; t9 load depends on the t6 store (via chain). +; +; So there is a ordering dependency between the two stores, even if it can't +; be found by only following chain dependencies. Neither can it be found by +; scanning from all merge candidates when only considering the non-chain +; operands as a starting point for the scan (as +; checkMergeStoreCandidatesForDependencies used to do). +; +; This test case validates that ISel is a success, and that no store merge is +; performed. + +%str0 = type { i64, i64 } +%str1 = type { i64, %str1* } + +@g0 = external global %str0, align 1 + +define i64 @foo() { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, :got:g0 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:g0] +; CHECK-NEXT: ldr x9, [x8] +; CHECK-NEXT: str xzr, [x8, #8] +; CHECK-NEXT: ldr x9, [x9, #8] +; CHECK-NEXT: ldr x0, [x9] +; CHECK-NEXT: str xzr, [x8] +; CHECK-NEXT: ret +entry: + %sp0 = getelementptr inbounds %str0, %str0* @g0, i32 0, i32 0 + %sp1 = getelementptr inbounds %str0, %str0* @g0, i32 0, i32 1 + store i64 0, i64* %sp1, align 1, !tbaa !1 + %l0 = load %str1*, %str1** undef, align 1 + %lp0 = getelementptr inbounds %str1, %str1* %l0, i32 0, i32 1 + %l1 = load %str1*, %str1** %lp0, align 1 + %lp1 = getelementptr inbounds %str1, %str1* %l1, i32 0, i32 0 + %l2 = load i64, i64* %lp1, align 1, !tbaa !7 + store i64 0, i64* %sp0, align 1 + ret i64 %l2 +} + +!llvm.ident = !{!0} + +!0 = !{!"clang version 14.0.0.prerel"} +!1 = !{!2, !6, i16 1} +!2 = !{!"dinges", !3, i16 0, !6, i16 1} +!3 = !{!"int", !4, i16 0} +!4 = !{!"omnipotent char", !5, i16 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!"any pointer", !4, i16 0} +!7 = !{!2, !3, i16 0}