diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23159,7 +23159,13 @@ // If the input is a concat_vectors, just make a larger concat by padding // with smaller undefs. - if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) { + // + // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining + // here could cause an infinite loop. That legalizing happens when LegalDAG + // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is + // scalable. + if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() && + !(LegalDAG && In.getValueType().isScalableVector())) { unsigned NumOps = N->getNumOperands() * In.getNumOperands(); SmallVector Ops(In->op_begin(), In->op_end()); Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType())); diff --git a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s + +; Test that we do not end in an infinite loop (https://github.com/llvm/llvm-project/issues/63322) + +declare void @llvm.masked.scatter.nxv16i8.nxv16p0(, , i32 immarg, ) + +define fastcc i8 @allocno_reload_assign() { +; CHECK-LABEL: allocno_reload_assign: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.b, #0 // =0x0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z4.h, z3.b +; CHECK-NEXT: uunpkhi z7.h, z3.b +; CHECK-NEXT: uunpklo z2.s, z4.h +; CHECK-NEXT: uunpkhi z4.s, z4.h +; CHECK-NEXT: uunpklo z6.s, z7.h +; CHECK-NEXT: uunpkhi z16.s, z7.h +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: uunpklo z1.d, z2.s +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: uunpklo z3.d, z4.s +; CHECK-NEXT: uunpkhi z4.d, z4.s +; CHECK-NEXT: uunpklo z5.d, z6.s +; CHECK-NEXT: uunpkhi z6.d, z6.s +; CHECK-NEXT: uunpklo z7.d, z16.s +; CHECK-NEXT: uunpkhi z16.d, z16.s +; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fmov d17, xzr +; CHECK-NEXT: cmpeq p2.d, p0/z, z17.d, #0 +; CHECK-NEXT: uzp1 p2.s, p2.s, p0.s +; CHECK-NEXT: uzp1 p2.h, p2.h, p0.h +; CHECK-NEXT: uzp1 p2.b, p2.b, p0.b +; CHECK-NEXT: mov z17.b, p2/z, #1 // =0x1 +; CHECK-NEXT: fmov w8, s17 +; CHECK-NEXT: sbfx x8, x8, #0, #1 +; CHECK-NEXT: whilelo p2.b, xzr, x8 +; CHECK-NEXT: not p2.b, p1/z, p2.b +; CHECK-NEXT: punpklo p3.h, p2.b +; CHECK-NEXT: punpkhi p2.h, p2.b +; CHECK-NEXT: punpklo p4.h, p3.b +; CHECK-NEXT: punpkhi p3.h, p3.b +; CHECK-NEXT: punpklo p5.h, p4.b +; CHECK-NEXT: punpkhi p4.h, p4.b +; CHECK-NEXT: st1b { z1.d }, p5, [z0.d] +; CHECK-NEXT: punpklo p5.h, p2.b +; CHECK-NEXT: st1b { z2.d }, p4, [z0.d] +; CHECK-NEXT: punpklo p4.h, p3.b +; CHECK-NEXT: punpkhi p2.h, p2.b +; CHECK-NEXT: punpkhi p3.h, p3.b +; CHECK-NEXT: st1b { z3.d }, p4, [z0.d] +; CHECK-NEXT: punpklo p4.h, p5.b +; CHECK-NEXT: st1b { z4.d }, p3, [z0.d] +; CHECK-NEXT: punpkhi p3.h, p5.b +; CHECK-NEXT: st1b { z5.d }, p4, [z0.d] +; CHECK-NEXT: punpklo p4.h, p2.b +; CHECK-NEXT: punpkhi p2.h, p2.b +; CHECK-NEXT: st1b { z6.d }, p3, [z0.d] +; CHECK-NEXT: st1b { z7.d }, p4, [z0.d] +; CHECK-NEXT: st1b { z16.d }, p2, [z0.d] +; CHECK-NEXT: b .LBB0_1 + br label %1 + +1: ; preds = %1, %0 + call void @llvm.masked.scatter.nxv16i8.nxv16p0( zeroinitializer, zeroinitializer, i32 0, xor ( shufflevector ( icmp eq ( insertelement ( poison, ptr null, i64 0), zeroinitializer), poison, zeroinitializer), shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer))) + br label %1 +} + +uselistorder poison, { 1, 2, 0 }