Index: include/llvm/CodeGen/MachineMemOperand.h =================================================================== --- include/llvm/CodeGen/MachineMemOperand.h +++ include/llvm/CodeGen/MachineMemOperand.h @@ -59,6 +59,11 @@ return MachinePointerInfo(V.get(), Offset+O); } + /// Return true if memory region [V, V+Offset+Size) is known to be + /// dereferenceable. + bool isDereferenceable(unsigned Size, LLVMContext &C, + const DataLayout &DL) const; + /// Return the LLVM IR address space number that this pointer points into. unsigned getAddrSpace() const; Index: lib/CodeGen/MachineInstr.cpp =================================================================== --- lib/CodeGen/MachineInstr.cpp +++ lib/CodeGen/MachineInstr.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -558,6 +559,23 @@ return cast(V.get()->getType())->getAddressSpace(); } +/// isDereferenceable - Return true if V is always dereferenceable for +/// Offset + Size byte. +bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, + const DataLayout &DL) const { + if (!V.is()) + return false; + + const Value *BasePtr = V.get(); + if (BasePtr == nullptr) + return false; + + return isDereferenceableAndAlignedPointer(BasePtr, 1, + APInt(DL.getPointerSize(), + Offset + Size), + DL); +} + /// getConstantPool - Return a MachinePointerInfo record that refers to the /// constant pool. MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4897,6 +4897,8 @@ // TODO: In the AlwaysInline case, if the size is big then generate a loop // rather than maybe a humongous number of loads and stores. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); std::vector MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); @@ -4923,15 +4925,15 @@ return SDValue(); if (DstAlignCanChange) { - Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && - DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign)) + DL.exceedsNaturalStackAlignment(NewAlign)) NewAlign /= 2; if (NewAlign > Align) { @@ -4991,12 +4993,19 @@ // thing to do is generate a LoadExt/StoreTrunc pair. These simplify // to Load/Store if NVT==VT. // FIXME does the case above also need this? - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + EVT NVT = TLI.getTypeToTransformTo(C, VT); assert(NVT.bitsGE(VT)); + + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - MinAlign(SrcAlign, SrcOff), MMOFlags); + MinAlign(SrcAlign, SrcOff), SrcMMOFlags); OutChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), @@ -5024,6 +5033,8 @@ // Expand memmove to a series of load and store ops if the size operand falls // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &C = *DAG.getContext(); std::vector MemOps; bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); @@ -5046,8 +5057,8 @@ return SDValue(); if (DstAlignCanChange) { - Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + Type *Ty = MemOps[0].getTypeForEVT(C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) @@ -5068,9 +5079,15 @@ unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value; + bool isDereferenceable = + SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); + MachineMemOperand::Flags SrcMMOFlags = MMOFlags; + if (isDereferenceable) + SrcMMOFlags |= MachineMemOperand::MODereferenceable; + Value = DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), - SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags); + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; Index: test/CodeGen/PowerPC/memcpy_dereferenceable.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/memcpy_dereferenceable.ll @@ -0,0 +1,74 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +; This code causes an assertion failure if dereferenceable flag is not properly set in the load generated for memcpy + +; CHECK-LABEL: @func +; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-NOT: lxvd2x +; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: blr + +define void @func(i1 %flag) { +entry: + %pairs = alloca [4 x <2 x i64>], align 8 + %pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1 + %pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2 + %pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>* + %pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>* + %dst = bitcast [4 x <2 x i64>]* %pairs to i8* + %src = bitcast <2 x i64>* %pair2 to i8* + br i1 %flag, label %end, label %dummy + +end: + ; copy third element into first element by memcpy + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false) + ; copy third element into second element by LD/ST + %vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8 + store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8 + ret void + +dummy: + ; to make use of %src in another BB + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false) + br label %end +} + + +; CHECK-LABEL: @func2 +; CHECK: lxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK-NOT: lxvd2x +; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: stxvd2x [[VREG:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; CHECK: blr + +define void @func2(i1 %flag) { +entry: + %pairs = alloca [4 x <2 x i64>], align 8 + %pair1 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 1 + %pair2 = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* %pairs, i64 0, i64 2 + %pvec1 = bitcast <2 x i64>* %pair1 to <2 x i64>* + %pvec2 = bitcast <2 x i64>* %pair2 to <2 x i64>* + %dst = bitcast [4 x <2 x i64>]* %pairs to i8* + %src = bitcast <2 x i64>* %pair2 to i8* + br i1 %flag, label %end, label %dummy + +end: + ; copy third element into first element by memcpy + call void @llvm.memmove.p0i8.p0i8.i64(i8* nonnull %dst, i8* %src, i64 16, i32 8, i1 false) + ; copy third element into second element by LD/ST + %vec2 = load <2 x i64>, <2 x i64>* %pvec2, align 8 + store <2 x i64> %vec2, <2 x i64>* %pvec1, align 8 + ret void + +dummy: + ; to make use of %src in another BB + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %src, i8* %src, i64 0, i32 0, i1 false) + br label %end +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1 + +attributes #1 = { argmemonly nounwind }