Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -459,12 +459,12 @@ break; } - LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Result->dump(&DAG)); switch (Action) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: - Result = Promote(Op); + Result = Promote(Result); Changed = true; break; case TargetLowering::Legal: @@ -472,7 +472,7 @@ break; case TargetLowering::Custom: { LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); - if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { + if (SDValue Tmp1 = TLI.LowerOperation(Result, DAG)) { LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); Result = Tmp1; break; @@ -481,7 +481,7 @@ LLVM_FALLTHROUGH; } case TargetLowering::Expand: - Result = Expand(Op); + Result = Expand(Result); } // Make sure that the generated code is itself legal. Index: llvm/test/CodeGen/X86/avx512-cmp.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-cmp.ll +++ llvm/test/CodeGen/X86/avx512-cmp.ll @@ -181,3 +181,39 @@ if.end.i: ret i32 6 } + +; This test previously caused an infinite loop in legalize vector ops. Due to +; CSE triggering on the call to UpdateNodeOperands and the resulting node not +; being passed to LowerOperation. The add is needed to force the zext into a +; sext on that path. The shuffle keeps the zext alive. The xor somehow +; influences the zext to be visited before the sext exposing the CSE opportunity +; for the sext since zext of setcc is custom legalized to a sext and shift. +define <8 x i32> @legalize_loop(<8 x double> %arg) { +; KNL-LABEL: legalize_loop: +; KNL: ## %bb.0: +; KNL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vcmpnltpd %zmm0, %zmm1, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpsrld $31, %ymm0, %ymm1 +; KNL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4] +; KNL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; KNL-NEXT: vpsubd %ymm0, %ymm1, %ymm0 +; KNL-NEXT: retq +; +; SKX-LABEL: legalize_loop: +; SKX: ## %bb.0: +; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vcmpnltpd %zmm0, %zmm1, %k0 +; SKX-NEXT: vpmovm2d %k0, %ymm0 +; SKX-NEXT: vpsrld $31, %ymm0, %ymm1 +; SKX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4] +; SKX-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1] +; SKX-NEXT: vpsubd %ymm0, %ymm1, %ymm0 +; SKX-NEXT: retq + %tmp = fcmp ogt <8 x double> %arg, zeroinitializer + %tmp1 = xor <8 x i1> %tmp, + %tmp2 = zext <8 x i1> %tmp1 to <8 x i32> + %tmp3 = shufflevector <8 x i32> %tmp2, <8 x i32> undef, <8 x i32> + %tmp4 = add <8 x i32> %tmp2, %tmp3 + ret <8 x i32> %tmp4 +}