Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -698,6 +698,11 @@ case ISD::Constant: case ISD::ConstantFP: return StoreSource::Constant; + case ISD::BUILD_VECTOR: + if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode())) + return StoreSource::Constant; + return StoreSource::Unknown; case ISD::EXTRACT_VECTOR_ELT: case ISD::EXTRACT_SUBVECTOR: return StoreSource::Extract; @@ -19474,6 +19479,10 @@ // If fp truncation is necessary give up for now. if (MemVT.getSizeInBits() != ElementSizeBits) return false; + } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) { + // Not yet handled + return false; } else { llvm_unreachable("Invalid constant element type"); } @@ -19604,7 +19613,7 @@ case StoreSource::Constant: if (NoTypeMatch) return false; - if (!isIntOrFPConstant(OtherBC)) + if (getStoreSource(OtherBC) != StoreSource::Constant) return false; break; case StoreSource::Extract: @@ -19826,6 +19835,8 @@ IsElementZero = C->isZero(); else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) IsElementZero = C->getConstantFPValue()->isNullValue(); + else if (ISD::isBuildVectorAllZeros(StoredVal.getNode())) + IsElementZero = true; if (IsElementZero) { if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) FirstZeroAfterNonZero = i; Index: llvm/test/CodeGen/RISCV/rvv/memset-inline.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -544,53 +544,31 @@ define void @bzero_64(ptr %a) nounwind { ; RV32-LABEL: bzero_64: ; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 48 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: li a1, 64 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vse8.v v8, (a1) -; RV32-NEXT: addi a1, a0, 32 -; RV32-NEXT: vse8.v v8, (a1) -; RV32-NEXT: addi a1, a0, 16 -; RV32-NEXT: vse8.v v8, (a1) ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: bzero_64: ; RV64: # %bb.0: -; RV64-NEXT: addi a1, a0, 48 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: li a1, 64 +; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vse8.v v8, (a1) -; RV64-NEXT: addi a1, a0, 32 -; RV64-NEXT: vse8.v v8, (a1) -; RV64-NEXT: addi a1, a0, 16 -; RV64-NEXT: vse8.v v8, (a1) ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: bzero_64: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: addi a1, a0, 48 -; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-FAST-NEXT: vmv.v.i v8, 0 -; RV32-FAST-NEXT: vse64.v v8, (a1) -; RV32-FAST-NEXT: addi a1, a0, 32 -; RV32-FAST-NEXT: vse64.v v8, (a1) -; RV32-FAST-NEXT: addi a1, a0, 16 -; RV32-FAST-NEXT: vse64.v v8, (a1) ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_64: ; RV64-FAST: # %bb.0: -; RV64-FAST-NEXT: addi a1, a0, 48 -; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-FAST-NEXT: vmv.v.i v8, 0 -; RV64-FAST-NEXT: vse64.v v8, (a1) -; RV64-FAST-NEXT: addi a1, a0, 32 -; RV64-FAST-NEXT: vse64.v v8, (a1) -; RV64-FAST-NEXT: addi a1, a0, 16 -; RV64-FAST-NEXT: vse64.v v8, (a1) ; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0) @@ -686,27 +664,15 @@ define void @aligned_bzero_64(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_64: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_64: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0) @@ -717,28 +683,16 @@ ; RV32-BOTH-LABEL: aligned_bzero_66: ; RV32-BOTH: # %bb.0: ; RV32-BOTH-NEXT: sh zero, 64(a0) -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_66: ; RV64-BOTH: # %bb.0: ; RV64-BOTH-NEXT: sh zero, 64(a0) -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 66, i1 0) @@ -754,12 +708,8 @@ ; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: addi a1, a0, 64 ; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse64.v v8, (a1) +; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-BOTH-NEXT: vmv.v.i v8, 0 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; @@ -771,12 +721,8 @@ ; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: addi a1, a0, 64 ; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse64.v v8, (a1) +; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-BOTH-NEXT: vmv.v.i v8, 0 ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 96, i1 0) @@ -786,43 +732,15 @@ define void @aligned_bzero_128(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_128: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a1, a0, 112 -; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 96 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 80 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 64 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 16 -; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_128: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a1, a0, 112 -; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 96 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 80 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 64 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 16 -; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 128, i1 0) @@ -832,74 +750,18 @@ define void @aligned_bzero_256(ptr %a) nounwind { ; RV32-BOTH-LABEL: aligned_bzero_256: ; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: addi a1, a0, 240 -; RV32-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-BOTH-NEXT: vmv.v.i v8, 0 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 224 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 208 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 192 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 176 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 160 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 144 -; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: addi a1, a0, 128 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 112 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 96 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 80 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 64 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 48 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 32 -; RV32-BOTH-NEXT: vse64.v v8, (a1) -; RV32-BOTH-NEXT: addi a1, a0, 16 +; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-BOTH-NEXT: vmv.v.i v8, 0 ; RV32-BOTH-NEXT: vse64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_bzero_256: ; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: addi a1, a0, 240 -; RV64-BOTH-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-BOTH-NEXT: vmv.v.i v8, 0 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 224 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 208 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 192 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 176 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 160 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 144 -; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: addi a1, a0, 128 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 112 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 96 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 80 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 64 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 48 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 32 -; RV64-BOTH-NEXT: vse64.v v8, (a1) -; RV64-BOTH-NEXT: addi a1, a0, 16 +; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-BOTH-NEXT: vmv.v.i v8, 0 ; RV64-BOTH-NEXT: vse64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret Index: llvm/test/CodeGen/X86/MergeConsecutiveStores.ll =================================================================== --- llvm/test/CodeGen/X86/MergeConsecutiveStores.ll +++ llvm/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -767,13 +767,12 @@ } -; Merging vector stores when sourced from a constant vector is not currently handled. define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) { ; CHECK-LABEL: merge_vec_stores_of_constants: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vmovaps %xmm0, 48(%rdi) -; CHECK-NEXT: vmovaps %xmm0, 64(%rdi) +; CHECK-NEXT: vmovups %ymm0, 48(%rdi) +; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3 %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4 Index: llvm/test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-mask-op.ll +++ llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -4211,13 +4211,33 @@ } define void @store_v128i1_constant(ptr %R) { -; CHECK-LABEL: store_v128i1_constant: -; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD -; CHECK-NEXT: movq %rax, 8(%rdi) -; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD -; CHECK-NEXT: movq %rax, (%rdi) -; CHECK-NEXT: retq +; KNL-LABEL: store_v128i1_constant: +; KNL: ## %bb.0: ## %entry +; KNL-NEXT: vmovaps {{.*#+}} xmm0 = [61437,65535,65403,57343,57341,65535,65467,49151] +; KNL-NEXT: vmovaps %xmm0, (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: store_v128i1_constant: +; SKX: ## %bb.0: ## %entry +; SKX-NEXT: movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD +; SKX-NEXT: movq %rax, 8(%rdi) +; SKX-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD +; SKX-NEXT: movq %rax, (%rdi) +; SKX-NEXT: retq +; +; AVX512BW-LABEL: store_v128i1_constant: +; AVX512BW: ## %bb.0: ## %entry +; AVX512BW-NEXT: movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD +; AVX512BW-NEXT: movq %rax, 8(%rdi) +; AVX512BW-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD +; AVX512BW-NEXT: movq %rax, (%rdi) +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: store_v128i1_constant: +; AVX512DQ: ## %bb.0: ## %entry +; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [61437,65535,65403,57343,57341,65535,65467,49151] +; AVX512DQ-NEXT: vmovaps %xmm0, (%rdi) +; AVX512DQ-NEXT: retq ; ; X86-LABEL: store_v128i1_constant: ; X86: ## %bb.0: ## %entry