Index: lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1575,6 +1575,36 @@ if (MMO.getAlignment() < 4) return false; + // Check that the pointer has not been cast up to a larger type, as this + // could create an unaligned access. + std::function IsAlignedPtr = [&](const Value *Ptr) { + auto *PtrTy = cast(Ptr->getType()); + unsigned AccessSize = PtrTy->getElementType()->getScalarSizeInBits(); + if (auto *Cast = dyn_cast(Ptr)) { + if (auto *CastedPtrTy = dyn_cast(Cast->getSrcTy())) { + unsigned MemSize = CastedPtrTy->getElementType()->getScalarSizeInBits(); + if (isa(CastedPtrTy->getElementType())) + return IsAlignedPtr(Cast->getOperand(0)); + else + return AccessSize <= MemSize; + } + } + return true; + }; + + // Ensure that the underlying memory is truly aligned. This ensures that we + // don't create create illegal accesses when the DSP instructions use LDR + // and STR to load and store packed data. + const MachinePointerInfo &PointerInfo = MMO.getPointerInfo(); + if (!PointerInfo.V.isNull() && PointerInfo.V.is()) { + auto *PtrVal = PointerInfo.V.get(); + if (auto *GEP = dyn_cast(PtrVal)) { + if (!IsAlignedPtr(GEP->getPointerOperand())) + return false; + } else if (isa(PtrVal) && !IsAlignedPtr(PtrVal)) + return false; + } + // str could probably be eliminated entirely, but for now we just want // to avoid making a mess of it. // FIXME: Use str as a wildcard to enable better stm folding. Index: test/CodeGen/ARM/ldrd-strd-unaligned.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/ldrd-strd-unaligned.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=thumbv7em -mcpu=cortex-m7 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7 -mcpu=cortex-a8 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8 -mcpu=cortex-a53 %s -o - | FileCheck %s + +; CHECK-LABEL: bitcast_ptr_ldr +; CHECK-NOT: ldrd +define i32 @bitcast_ptr_ldr(i16* %In) { +entry: + %0 = bitcast i16* %In to i32* + %in.addr.0 = getelementptr inbounds i32, i32* %0, i32 0 + %in.addr.1 = getelementptr inbounds i32, i32* %0, i32 1 + %1 = load i32, i32* %in.addr.0, align 4 + %2 = load i32, i32* %in.addr.1, align 4 + %mul = mul i32 %1, %2 + ret i32 %mul +} + +; CHECK-LABEL: bitcast_gep_ldr +; CHECK-NOT: ldrd +define i32 @bitcast_gep_ldr(i16* %In) { +entry: + %in.addr.0 = getelementptr inbounds i16, i16* %In, i32 0 + %in.addr.1 = getelementptr inbounds i16, i16* %In, i32 2 + %cast.0 = bitcast i16* %in.addr.0 to i32* + %cast.1 = bitcast i16* %in.addr.1 to i32* + %0 = load i32, i32* %cast.0, align 4 + %1 = load i32, i32* %cast.1, align 4 + %mul = mul i32 %0, %1 + ret i32 %mul +} + +; CHECK-LABEL: bitcast_ptr_str +; CHECK-NOT: strd +define void @bitcast_ptr_str(i32 %arg0, i32 %arg1, i16* %out) { +entry: + %0 = bitcast i16* %out to i32* + %out.addr.0 = getelementptr inbounds i32, i32* %0, i32 0 + %out.addr.1 = getelementptr inbounds i32, i32* %0, i32 1 + store i32 %arg0, i32* %out.addr.0, align 4 + store i32 %arg1, i32* %out.addr.1, align 4 + ret void +} + +; CHECK-LABEL: bitcast_gep_str +; CHECK-NOT: strd +define void @bitcast_gep_str(i32 %arg0, i32 %arg1, i16* %out) { +entry: + %out.addr.0 = getelementptr inbounds i16, i16* %out, i32 0 + %out.addr.1 = getelementptr inbounds i16, i16* %out, i32 2 + %cast.0 = bitcast i16* %out.addr.0 to i32* + %cast.1 = bitcast i16* %out.addr.1 to i32* + store i32 %arg0, i32* %cast.0, align 4 + store i32 %arg1, i32* %cast.1, align 4 + ret void +}