Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4224,15 +4224,18 @@ bool N0ConstOrSplat = isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true); + bool N0Splat = N0->getOpcode() == ISD::SPLAT_VECTOR; bool N1ConstOrSplat = isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true); + bool N1Splat = N1->getOpcode() == ISD::SPLAT_VECTOR; - // Ensure that the constant occurs on the RHS and fold constant comparisons. + // Canonicalize toward having the constant on the RHS. // TODO: Handle non-splat vector constants. All undef causes trouble. // FIXME: We can't yet fold constant scalable vector splats, so avoid an // infinite loop here when we encounter one. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) && + if (N0ConstOrSplat && ((!OpVT.isScalableVector() && (N0Splat ^ N1Splat)) || + !N1ConstOrSplat) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); Index: llvm/test/CodeGen/AArch64/arm64-setcc-swap-infloop.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64-setcc-swap-infloop.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm64-apple-ios --global-isel=0 | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios" + +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) + +; TargetLowering::SimplifySetCC wants to swap the operands of a SETCC to +; canonicalize the constant to the RHS. The bug here was that it did so whether +; or not the RHS was already a constant, leading to an infinite loop. +define <16 x i1> @setcc_swap_infloop(ptr %arg) { +; CHECK-LABEL: setcc_swap_infloop: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: mov w9, #16 ; =0x10 +; CHECK-NEXT: movi.16b v1, #1 +; CHECK-NEXT: ldr q0, [x8] +; CHECK-NEXT: cmeq.16b v2, v1, #0 +; CHECK-NEXT: str q1, [x8] +; CHECK-NEXT: cmeq.16b v0, v0, #0 +; CHECK-NEXT: str q1, [x9] +; CHECK-NEXT: orr.16b v0, v0, v2 +; CHECK-NEXT: ret + call void @llvm.memset.p0.i64(ptr nonnull null, i8 1, i64 32, i1 false) + %v = getelementptr inbounds i8, ptr null, i64 16 + %v14 = load <16 x i8>, ptr undef, align 32 + %v15 = icmp eq <16 x i8> %v14, zeroinitializer + %v16 = load <16 x i8>, ptr %v, align 16 + %v17 = icmp eq <16 x i8> %v16, zeroinitializer + %v20 = or <16 x i1> %v15, %v17 + ret <16 x i1> %v20 +}