diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3553,6 +3553,14 @@ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, DAGCombinerInfo &DCI) const; + /// Return true if the target supports simplifying demanded vector elements by + /// converting them to undefs. + virtual bool + shouldSimplifyDemandedVectorElts(SDValue Op, + const TargetLoweringOpt &TLO) const { + return true; + } + /// Determine which of the bits specified in Mask are known to be either zero /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts /// argument allows us to only collect the known bits that are shared by the diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2642,6 +2642,10 @@ KnownUndef = KnownZero = APInt::getZero(NumElts); + const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo(); + if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO)) + return false; + // TODO: For now we assume we know nothing about scalable vectors. if (VT.isScalableVector()) return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -113,6 +113,10 @@ report_fatal_error("llvm.clear_cache is not supported on wasm"); } + bool + shouldSimplifyDemandedVectorElts(SDValue Op, + const TargetLoweringOpt &TLO) const override; + // Custom lowering hooks. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -909,6 +909,30 @@ return TargetLoweringBase::getPreferredVectorAction(VT); } +bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts( + SDValue Op, const TargetLoweringOpt &TLO) const { + // ISel process runs DAGCombiner after legalization; this step is called + // SelectionDAG optimization phase. This post-legalization combining process + // runs DAGCombiner on each node, and if there was a change to be made, + // re-runs legalization again on it and its user nodes to make sure + // everythiing is in a legalized state. + // + // The legalization calls lowering routines, and we do our custom lowering for + // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements + // into zeros. But there is a set of routines in DAGCombiner that turns unused + // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts + // turns unused vector elements into undefs. But this routine does not work + // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This + // combination can result in a infinite loop, in which undefs are converted to + // zeros in legalization and back to undefs in combining. + // + // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from + // running for build_vectors. + if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys) + return false; + return true; +} + //===----------------------------------------------------------------------===// // WebAssembly Lowering private implementation. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/WebAssembly/simd-simplify-demanded-vector-elts.ll b/llvm/test/CodeGen/WebAssembly/simd-simplify-demanded-vector-elts.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-simplify-demanded-vector-elts.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mattr=+simd128 -verify-machineinstrs + +target triple = "wasm32-unknown-unknown" + +; After DAG legalization, in SelectionDAG optimization phase, ISel runs +; DAGCombiner on each node, among which SimplifyDemandedVectorElts turns unused +; vector elements into undefs. And in order to make sure the DAG is in a +; legalized state, it runs legalization again, which runs our custom +; LowerBUILD_VECTOR, which converts undefs into zeros, causing an infinite loop. +; We prevent this from happening by creating a custom hook , which allows us to +; bail out of SimplifyDemandedVectorElts after legalization. + +; This is a reduced test case from a bug reproducer reported. This should not +; hang. +define void @test(i8 %0) { + %2 = insertelement <4 x i8> , i8 %0, i64 3 + %3 = zext <4 x i8> %2 to <4 x i32> + %4 = mul nuw nsw <4 x i32> %3, + %5 = add nuw nsw <4 x i32> %4, + %6 = lshr <4 x i32> %5, + %7 = mul nuw nsw <4 x i32> %6, + %8 = add nuw nsw <4 x i32> %7, + %9 = and <4 x i32> %8, + %10 = sub nsw <4 x i32> , %9 + %11 = ashr exact <4 x i32> %10, + %12 = trunc <4 x i32> %11 to <4 x i16> + store <4 x i16> %12, <4 x i16>* undef, align 4 + ret void +}