Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21121,14 +21121,17 @@ // If we know required SrcValue1 and SrcValue2 have relatively large // alignment compared to the size and offset of the access, we may be able // to prove they do not alias. This check is conservative for now to catch - // cases created by splitting vector types. + // cases created by splitting vector types, it only works when the offsets are + // multiples of the size of the data. int64_t SrcValOffset0 = MUC0.MMO->getOffset(); int64_t SrcValOffset1 = MUC1.MMO->getOffset(); unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment(); unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment(); if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() && - *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) { + *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes && + SrcValOffset0 % *MUC0.NumBytes == 0 && + SrcValOffset1 % *MUC0.NumBytes == 0) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; Index: llvm/test/CodeGen/ARM/memset-align.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/memset-align.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv8-unknown-linux-android10000 -o - | FileCheck %s + +%struct.af = type <{ i64, i64, i8, i8, i8, [5 x i8] }> + +define void @test() { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: movs r2, #15 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: strd r1, r1, [sp, #8] +; CHECK-NEXT: strd r1, r1, [sp] +; CHECK-NEXT: str r1, [sp, #16] +; CHECK-NEXT: vst1.64 {d16, d17}, [r3], r2 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: str r2, [r3] +; CHECK-NEXT: str r1, [sp, #20] +; CHECK-NEXT: bl callee +; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: pop {r7, pc} +entry: + %a = alloca %struct.af, align 8 + %0 = bitcast %struct.af* %a to i8* + %1 = bitcast %struct.af* %a to i8* + call void @llvm.memset.p0i8.i64(i8* align 8 %1, i8 -1, i64 24, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 19, i1 false) + call void @callee(%struct.af* %a) + ret void +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) +declare void @callee(%struct.af*) local_unnamed_addr #1