diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -698,6 +698,11 @@ case ISD::Constant: case ISD::ConstantFP: return StoreSource::Constant; + case ISD::BUILD_VECTOR: + if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode())) + return StoreSource::Constant; + return StoreSource::Unknown; case ISD::EXTRACT_VECTOR_ELT: case ISD::EXTRACT_SUBVECTOR: return StoreSource::Extract; @@ -19471,6 +19476,10 @@ // If fp truncation is necessary give up for now. if (MemVT.getSizeInBits() != ElementSizeBits) return false; + } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) { + // Not yet handled + return false; } else { llvm_unreachable("Invalid constant element type"); } @@ -19601,7 +19610,7 @@ case StoreSource::Constant: if (NoTypeMatch) return false; - if (!isIntOrFPConstant(OtherBC)) + if (getStoreSource(OtherBC) != StoreSource::Constant) return false; break; case StoreSource::Extract: @@ -19823,6 +19832,8 @@ IsElementZero = C->isZero(); else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) IsElementZero = C->getConstantFPValue()->isNullValue(); + else if (ISD::isBuildVectorAllZeros(StoredVal.getNode())) + IsElementZero = true; if (IsElementZero) { if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) FirstZeroAfterNonZero = i; diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll --- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -544,53 +544,31 @@ define void @bzero_64(ptr %a) nounwind { ; RV32-LABEL: bzero_64: ; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 48 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: li a1, 64 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vse8.v v8, (a1) -; RV32-NEXT: addi a1, a0, 32 -; RV32-NEXT: vse8.v v8, (a1) -; RV32-NEXT: addi a1, a0, 16 -; RV32-NEXT: vse8.v v8, (a1) ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_64: ; RV64: # %bb.0: -; RV64-NEXT: addi a1, a0, 48 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: li a1, 64 +; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vse8.v v8, (a1) -; RV64-NEXT: addi a1, a0, 32 -; RV64-NEXT: vse8.v v8, (a1) -; RV64-NEXT: addi a1, a0, 16 -; RV64-NEXT: vse8.v v8, (a1) ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_64: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: addi a1, a0, 48 -; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-FAST-NEXT: vmv.v.i v8, 0 -; RV32-FAST-NEXT: vse64.v v8, (a1) -; RV32-FAST-NEXT: addi a1, a0, 32 -; RV32-FAST-NEXT: vse64.v v8, (a1) -; RV32-FAST-NEXT: addi a1, a0, 16 -; RV32-FAST-NEXT: vse64.v v8, (a1) ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_64: ; RV64-FAST: # %bb.0: -; RV64-FAST-NEXT: addi a1, a0, 48 -; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-FAST-NEXT: vmv.v.i v8, 0 -; RV64-FAST-NEXT: vse64.v v8, (a1) -; RV64-FAST-NEXT: addi a1, a0, 32 -; RV64-FAST-NEXT: vse64.v v8, (a1) -; RV64-FAST-NEXT: addi a1, a0, 16 -; RV64-FAST-NEXT: vse64.v v8, (a1) ; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0) @@ -686,27 +664,15 @@ define void @aligned_bzero_64(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_64: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_64: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0) @@ -717,28 +683,16 @@ ; RV32-BOTH-LABEL: aligned_bzero_66: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sh zero, 64(a0) -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_66: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sh zero, 64(a0) -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 66, i1 0) @@ -754,12 +708,8 @@ ; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: addi a1, a0, 64 ; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse64.v v8, (a1) +; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-BOTH-NEXT: vmv.v.i v8, 0 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; @@ -771,12 +721,8 @@ ; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: addi a1, a0, 64 ; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse64.v v8, (a1) +; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-BOTH-NEXT: vmv.v.i v8, 0 ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 96, i1 0) @@ -786,43 +732,15 @@ define void @aligned_bzero_128(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_128: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a1, a0, 112 -; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 96 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 80 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 64 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_128: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a1, a0, 112 -; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 96 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 80 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 64 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 128, i1 0) @@ -832,74 +750,18 @@ define void @aligned_bzero_256(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_256: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a1, a0, 240 -; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 224 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 208 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 192 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 176 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 160 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 144 -; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: addi a1, a0, 128 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 112 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 96 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 80 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 64 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 16 +; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-BOTH-NEXT: vmv.v.i v8, 0 ; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_256: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a1, a0, 240 -; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 224 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 208 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 192 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 176 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 160 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 144 -; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: addi a1, a0, 128 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 112 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 96 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 80 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 64 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 16 +; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-BOTH-NEXT: vmv.v.i v8, 0 ; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret diff --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll --- a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -767,20 +767,61 @@ } -; Merging vector stores when sourced from a constant vector is not currently handled. +define void @merge_vec_stores_of_zero(<4 x i32>* %ptr) { +; CHECK-LABEL: merge_vec_stores_of_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %ymm0, 48(%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3 + %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4 + store <4 x i32> zeroinitializer, <4 x i32>* %idx0, align 16 + store <4 x i32> zeroinitializer, <4 x i32>* %idx1, align 16 + ret void +} + +define void @merge_vec_stores_of_constant_splat(<4 x i32>* %ptr) { +; CHECK-LABEL: merge_vec_stores_of_constant_splat: +; CHECK: # %bb.0: +; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [42,42,42,42] +; CHECK-NEXT: vmovaps %xmm0, 48(%rdi) +; CHECK-NEXT: vmovaps %xmm0, 64(%rdi) +; CHECK-NEXT: retq + %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3 + %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4 + store <4 x i32> , <4 x i32>* %idx0, align 16 + store <4 x i32> , <4 x i32>* %idx1, align 16 + ret void +} + define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) { ; CHECK-LABEL: merge_vec_stores_of_constants: ; CHECK: # %bb.0: -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [25,51,45,0] ; CHECK-NEXT: vmovaps %xmm0, 48(%rdi) +; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [0,265,26,0] ; CHECK-NEXT: vmovaps %xmm0, 64(%rdi) ; CHECK-NEXT: retq %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3 %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4 - store <4 x i32> , <4 x i32>* %idx0, align 16 - store <4 x i32> , <4 x i32>* %idx1, align 16 + store <4 x i32> , <4 x i32>* %idx0, align 16 + store <4 x i32> , <4 x i32>* %idx1, align 16 ret void +} +define void @merge_vec_stores_of_constants_with_undefs(<4 x i32>* %ptr) { +; CHECK-LABEL: merge_vec_stores_of_constants_with_undefs: +; CHECK: # %bb.0: +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %ymm0, 48(%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3 + %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4 + store <4 x i32> , <4 x i32>* %idx0, align 16 + store <4 x i32> , <4 x i32>* %idx1, align 16 + ret void } ; This is a minimized test based on real code that was failing. @@ -855,17 +896,17 @@ ; CHECK-LABEL: merge_const_store_heterogeneous: ; CHECK: # %bb.0: ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jle .LBB20_3 +; CHECK-NEXT: jle .LBB23_3 ; CHECK-NEXT: # %bb.1: # %.lr.ph.preheader ; CHECK-NEXT: movabsq $578437695752307201, %rax # imm = 0x807060504030201 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB20_2: # %.lr.ph +; CHECK-NEXT: .LBB23_2: # %.lr.ph ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movq %rax, (%rsi) ; CHECK-NEXT: addq $24, %rsi ; CHECK-NEXT: decl %edi -; CHECK-NEXT: jne .LBB20_2 -; CHECK-NEXT: .LBB20_3: # %._crit_edge +; CHECK-NEXT: jne .LBB23_2 +; CHECK-NEXT: .LBB23_3: # %._crit_edge ; CHECK-NEXT: retq %1 = icmp sgt i32 %count, 0 br i1 %1, label %.lr.ph, label %._crit_edge diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -4211,13 +4211,33 @@ } define void @store_v128i1_constant(ptr %R) { -; CHECK-LABEL: store_v128i1_constant: -; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD -; CHECK-NEXT: movq %rax, 8(%rdi) -; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD -; CHECK-NEXT: movq %rax, (%rdi) -; CHECK-NEXT: retq +; KNL-LABEL: store_v128i1_constant: +; KNL: ## %bb.0: ## %entry +; KNL-NEXT: vmovaps {{.*#+}} xmm0 = [61437,65535,65403,57343,57341,65535,65467,49151] +; KNL-NEXT: vmovaps %xmm0, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: store_v128i1_constant: +; SKX: ## %bb.0: ## %entry +; SKX-NEXT: movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD +; SKX-NEXT: movq %rax, 8(%rdi) +; SKX-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD +; SKX-NEXT: movq %rax, (%rdi) +; SKX-NEXT: retq +; +; AVX512BW-LABEL: store_v128i1_constant: +; AVX512BW: ## %bb.0: ## %entry +; AVX512BW-NEXT: movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD +; AVX512BW-NEXT: movq %rax, 8(%rdi) +; AVX512BW-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD +; AVX512BW-NEXT: movq %rax, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: store_v128i1_constant: +; AVX512DQ: ## %bb.0: ## %entry +; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [61437,65535,65403,57343,57341,65535,65467,49151] +; AVX512DQ-NEXT: vmovaps %xmm0, (%rdi) +; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_v128i1_constant: ; X86: ## %bb.0: ## %entry