Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21107,21 +21107,24 @@ // If we know required SrcValue1 and SrcValue2 have relatively large // alignment compared to the size and offset of the access, we may be able // to prove they do not alias. This check is conservative for now to catch - // cases created by splitting vector types. + // cases created by splitting vector types, it only works when the offsets are + // multiples of the size of the data. int64_t SrcValOffset0 = MUC0.MMO->getOffset(); int64_t SrcValOffset1 = MUC1.MMO->getOffset(); unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment(); unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment(); + auto &Size0 = MUC0.NumBytes; + auto &Size1 = MUC1.NumBytes; if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() && - *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) { + Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 && + OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 && + SrcValOffset1 % *Size1 == 0) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; // There is no overlap between these relatively aligned accesses of // similar size. Return no alias. - if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 || - (OffAlign1 + *MUC1.NumBytes) <= OffAlign0) + if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0) return false; } @@ -21134,11 +21137,12 @@ UseAA = false; #endif - if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) { + if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && + Size0.hasValue() && Size1.hasValue()) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); - int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset; - int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset; + int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset; + int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset; AliasResult AAResult = AA->alias( MemoryLocation(MUC0.MMO->getValue(), Overlap0, UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), Index: llvm/test/CodeGen/ARM/memset-align.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/memset-align.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv8-unknown-linux-android -o - | FileCheck %s + +%struct.af = type <{ i64, i64, i8, i8, i8, [5 x i8] }> + +define void @test() { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: mov.w r1, #-1 +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: movs r2, #15 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: strd r1, r1, [sp, #8] +; CHECK-NEXT: strd r1, r1, [sp] +; CHECK-NEXT: str r1, [sp, #16] +; CHECK-NEXT: vst1.64 {d16, d17}, [r3], r2 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: str r2, [r3] +; CHECK-NEXT: str r1, [sp, #20] +; CHECK-NEXT: bl callee +; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: pop {r7, pc} +entry: + %a = alloca %struct.af, align 8 + %0 = bitcast %struct.af* %a to i8* + %1 = bitcast %struct.af* %a to i8* + call void @llvm.memset.p0i8.i64(i8* align 8 %1, i8 -1, i64 24, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 19, i1 false) + call void @callee(%struct.af* %a) + ret void +} + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) +declare void @callee(%struct.af*) local_unnamed_addr #1