Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -7016,6 +7016,18 @@ return DAG.getTargetConstant(1, dl, VT); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { + if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { + // Split the pieces. + SDValue Lower = + DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(0, 32)); + SDValue Upper = + DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32)); + // We have to manually lower both halves so getNode doesn't try to + // reassemble the build_vector. + Lower = LowerBUILD_VECTORvXi1(Lower, DAG); + Upper = LowerBUILD_VECTORvXi1(Upper, DAG); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper); + } SDValue Imm = ConvertI1VectorToInteger(Op, DAG); if (Imm.getValueSizeInBits() == VT.getSizeInBits()) return DAG.getBitcast(VT, Imm); Index: llvm/trunk/test/CodeGen/X86/pr34605.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr34605.ll +++ llvm/trunk/test/CodeGen/X86/pr34605.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=avx512bw,avx512vl,avx512dq | FileCheck %s + +define void @pr34605(i8* nocapture %s, i32 %p) { +; CHECK-LABEL: pr34605: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm0 +; CHECK-NEXT: vpcmpeqd {{\.LCPI.*}}, %zmm0, %k0 +; CHECK-NEXT: vpcmpeqd {{\.LCPI.*}}, %zmm0, %k1 +; CHECK-NEXT: kunpckwd %k0, %k1, %k0 +; CHECK-NEXT: vpcmpeqd {{\.LCPI.*}}, %zmm0, %k1 +; CHECK-NEXT: vpcmpeqd {{\.LCPI.*}}, %zmm0, %k2 +; CHECK-NEXT: kunpckwd %k1, %k2, %k1 +; CHECK-NEXT: kunpckdq %k0, %k1, %k0 +; CHECK-NEXT: kxord %k0, %k0, %k1 +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: kmovd %ecx, %k2 +; CHECK-NEXT: kunpckdq %k2, %k1, %k1 +; CHECK-NEXT: kandq %k1, %k0, %k1 +; CHECK-NEXT: vmovdqu8 {{\.LCPI.*}}, %zmm0 {%k1} {z} +; CHECK-NEXT: vmovdqu32 %zmm0, (%eax) +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqu32 %zmm0, 64(%eax) +; CHECK-NEXT: vmovdqu32 %zmm0, 128(%eax) +; CHECK-NEXT: vmovdqu32 %zmm0, 192(%eax) +; CHECK-NEXT: vmovdqu32 %zmm0, 256(%eax) +; CHECK-NEXT: vmovdqu32 %zmm0, 320(%eax) +; CHECK-NEXT: vmovdqu32 %zmm0, 384(%eax) +; CHECK-NEXT: vmovdqu32 %zmm0, 448(%eax) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retl +entry: + %broadcast.splatinsert = insertelement <64 x i32> undef, i32 %p, i32 0 + %broadcast.splat = shufflevector <64 x i32> %broadcast.splatinsert, <64 x i32> undef, <64 x i32> zeroinitializer + %0 = icmp eq <64 x i32> %broadcast.splat, + %1 = and <64 x i1> %0, + %2 = zext <64 x i1> %1 to <64 x i8> + %3 = bitcast i8* %s to <64 x i8>* + store <64 x i8> %2, <64 x i8>* %3, align 1 + %4 = getelementptr inbounds i8, i8* %s, i32 64 + %5 = bitcast i8* %4 to <64 x i8>* + store <64 x i8> zeroinitializer, <64 x i8>* %5, align 1 + %6 = getelementptr inbounds i8, i8* %s, i32 128 + %7 = bitcast i8* %6 to <64 x i8>* + store <64 x i8> zeroinitializer, <64 x i8>* %7, align 1 + %8 = getelementptr inbounds i8, i8* %s, i32 192 + %9 = bitcast i8* %8 to <64 x i8>* + store <64 x i8> zeroinitializer, <64 x i8>* %9, align 1 + %10 = getelementptr inbounds i8, i8* %s, i32 256 + %11 = bitcast i8* %10 to <64 x i8>* + store <64 x i8> zeroinitializer, <64 x i8>* %11, align 1 + %12 = getelementptr inbounds i8, i8* %s, i32 320 + %13 = bitcast i8* %12 to <64 x i8>* + store <64 x i8> zeroinitializer, <64 x i8>* %13, align 1 + %14 = getelementptr inbounds i8, i8* %s, i32 384 + %15 = bitcast i8* %14 to <64 x i8>* + store <64 x i8> zeroinitializer, <64 x i8>* %15, align 1 + %16 = getelementptr inbounds i8, i8* %s, i32 448 + %17 = bitcast i8* %16 to <64 x i8>* + store <64 x i8> zeroinitializer, <64 x i8>* %17, align 1 + ret void +}