diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45532,12 +45532,22 @@
   assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
 
   // We're looking for an oversized integer equality comparison.
+  bool IsReduction = false;
   SDValue X = SetCC->getOperand(0);
   SDValue Y = SetCC->getOperand(1);
-  EVT OpVT = X.getValueType();
-  unsigned OpSize = OpVT.getSizeInBits();
-  if (!OpVT.isScalarInteger() || OpSize < 128)
-    return SDValue();
+  unsigned OpSize = X.getValueSizeInBits();
+  if (!X.getValueType().isScalarInteger() || OpSize < 128) {
+    // See if we can find a horizontal OR reduction, compared against zero.
+    if (X.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isNullConstant(Y))
+        return SDValue();
+    ISD::NodeType BinOp;
+    SDValue Match = DAG.matchBinOpReduction(X.getNode(), BinOp, {ISD::OR});
+    if (!Match || Match.getValueSizeInBits() < 128)
+       return SDValue();
+    OpSize = Match.getValueSizeInBits();
+    IsReduction = true;
+    X = Match;
+  }
 
   // Ignore a comparison with zero because that gets special treatment in
   // EmitTest(). But make an exception for the special case of a pair of
@@ -45545,7 +45555,7 @@
   // be generated by the memcmp expansion pass with oversized integer compares
   // (see PR33325).
   bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
-  if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
+  if (isNullConstant(Y) && !IsOrXorXorTreeCCZero && !IsReduction)
     return SDValue();
 
   // Don't perform this combine if constructing the vector will be expensive.
@@ -45633,8 +45643,12 @@
       // MOVMSK.
       Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT, HasPT, ScalarToVector);
     } else {
+      // For reductions, we were comparing against a scalar zero, but now it
+      // needs to be compared to a vector zero.
       SDValue VecX = ScalarToVector(X);
-      SDValue VecY = ScalarToVector(Y);
+      SDValue VecY = IsReduction ? getZeroVector(VecX.getSimpleValueType(),
+                                                 Subtarget, DAG, DL)
+                                 : ScalarToVector(Y);
       if (VecVT != CmpVT) {
         Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);
       } else if (HasPT) {
diff --git a/llvm/test/CodeGen/X86/pr45378.ll b/llvm/test/CodeGen/X86/pr45378.ll
--- a/llvm/test/CodeGen/X86/pr45378.ll
+++ b/llvm/test/CodeGen/X86/pr45378.ll
@@ -14,13 +14,14 @@
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pushq %rax
 ; SSE2-NEXT:    .cfi_def_cfa_offset 16
-; SSE2-NEXT:    movdqu (%rdi), %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    por %xmm0, %xmm1
-; SSE2-NEXT:    movq %xmm1, %rax
-; SSE2-NEXT:    testq %rax, %rax
+; SSE2-NEXT:    movdqu (%rdi), %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    cmpl $65535, %eax # imm = 0xFFFF
 ; SSE2-NEXT:    je .LBB0_2
 ; SSE2-NEXT:  # %bb.1: # %trap
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    movq %xmm0, %rdi
 ; SSE2-NEXT:    callq TrapFunc
 ; SSE2-NEXT:  .LBB0_2: # %ret
@@ -33,10 +34,7 @@
 ; SSE41-NEXT:    pushq %rax
 ; SSE41-NEXT:    .cfi_def_cfa_offset 16
 ; SSE41-NEXT:    movdqu (%rdi), %xmm0
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE41-NEXT:    por %xmm0, %xmm1
-; SSE41-NEXT:    movq %xmm1, %rax
-; SSE41-NEXT:    testq %rax, %rax
+; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    je .LBB0_2
 ; SSE41-NEXT:  # %bb.1: # %trap
 ; SSE41-NEXT:    pextrq $1, %xmm0, %rdi
@@ -46,59 +44,20 @@
 ; SSE41-NEXT:    .cfi_def_cfa_offset 8
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: parseHeaders:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    pushq %rax
-; AVX1-NEXT:    .cfi_def_cfa_offset 16
-; AVX1-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vmovq %xmm1, %rax
-; AVX1-NEXT:    testq %rax, %rax
-; AVX1-NEXT:    je .LBB0_2
-; AVX1-NEXT:  # %bb.1: # %trap
-; AVX1-NEXT:    vpextrq $1, %xmm0, %rdi
-; AVX1-NEXT:    callq TrapFunc
-; AVX1-NEXT:  .LBB0_2: # %ret
-; AVX1-NEXT:    popq %rax
-; AVX1-NEXT:    .cfi_def_cfa_offset 8
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: parseHeaders:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    pushq %rax
-; AVX2-NEXT:    .cfi_def_cfa_offset 16
-; AVX2-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX2-NEXT:    vpbroadcastq 8(%rdi), %xmm1
-; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm1
-; AVX2-NEXT:    vmovq %xmm1, %rax
-; AVX2-NEXT:    testq %rax, %rax
-; AVX2-NEXT:    je .LBB0_2
-; AVX2-NEXT:  # %bb.1: # %trap
-; AVX2-NEXT:    vpextrq $1, %xmm0, %rdi
-; AVX2-NEXT:    callq TrapFunc
-; AVX2-NEXT:  .LBB0_2: # %ret
-; AVX2-NEXT:    popq %rax
-; AVX2-NEXT:    .cfi_def_cfa_offset 8
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: parseHeaders:
-; AVX512:       # %bb.0:
-; AVX512-NEXT:    pushq %rax
-; AVX512-NEXT:    .cfi_def_cfa_offset 16
-; AVX512-NEXT:    vmovdqu (%rdi), %xmm0
-; AVX512-NEXT:    vpbroadcastq 8(%rdi), %xmm1
-; AVX512-NEXT:    vpor %xmm1, %xmm0, %xmm1
-; AVX512-NEXT:    vmovq %xmm1, %rax
-; AVX512-NEXT:    testq %rax, %rax
-; AVX512-NEXT:    je .LBB0_2
-; AVX512-NEXT:  # %bb.1: # %trap
-; AVX512-NEXT:    vpextrq $1, %xmm0, %rdi
-; AVX512-NEXT:    callq TrapFunc
-; AVX512-NEXT:  .LBB0_2: # %ret
-; AVX512-NEXT:    popq %rax
-; AVX512-NEXT:    .cfi_def_cfa_offset 8
-; AVX512-NEXT:    retq
+; AVX-LABEL: parseHeaders:
+; AVX:       # %bb.0:
+; AVX-NEXT:    pushq %rax
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    vmovdqu (%rdi), %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
+; AVX-NEXT:    je .LBB0_2
+; AVX-NEXT:  # %bb.1: # %trap
+; AVX-NEXT:    vpextrq $1, %xmm0, %rdi
+; AVX-NEXT:    callq TrapFunc
+; AVX-NEXT:  .LBB0_2: # %ret
+; AVX-NEXT:    popq %rax
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
   %vptr = bitcast i64 * %ptr to <2 x i64> *
   %vload = load <2 x i64>, <2 x i64> * %vptr, align 8
   %vreduce = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %vload)