Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -899,13 +899,14 @@ SDValue getStackArgumentTokenFactor(SDValue Chain); SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, - bool isTailCall, MachinePointerInfo DstPtrInfo, + SDValue Size, unsigned DstAlign, unsigned SrcAlign, + bool isVol, bool AlwaysInline, bool isTailCall, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo); SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVol, bool isTailCall, - MachinePointerInfo DstPtrInfo, + SDValue Size, unsigned DstAlign, unsigned SrcAlign, + bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo); SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, Index: include/llvm/CodeGen/SelectionDAGTargetInfo.h =================================================================== --- include/llvm/CodeGen/SelectionDAGTargetInfo.h +++ include/llvm/CodeGen/SelectionDAGTargetInfo.h @@ -52,8 +52,8 @@ virtual SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, - unsigned Align, bool isVolatile, - bool AlwaysInline, + unsigned DstAlign, unsigned SrcAlign, + bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { return SDValue(); @@ -67,8 +67,9 @@ /// lowering strategy should be used. virtual SDValue EmitTargetCodeForMemmove( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, - SDValue Op2, SDValue Op3, unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Op2, SDValue Op3, unsigned DstAlign, unsigned SrcAlign, + bool isVolatile, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { return SDValue(); } Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5315,26 +5315,23 @@ unsigned DstAS, unsigned SrcAS, SelectionDAG &DAG, const TargetLowering &TLI) { - assert((SrcAlign == 0 || SrcAlign >= DstAlign) && - "Expecting memcpy / memset source to meet alignment requirement!"); + // 'SrcAlign' and 'DstAlign' are the specified alignment of the source and + // destination with the following special values: // If 'SrcAlign' is zero, that means the memory operation does not need to - // load the value, i.e. memset or memcpy from constant string. Otherwise, - // it's the inferred alignment of the source. 'DstAlign', on the other hand, - // is the specified alignment of the memory operation. If it is zero, that - // means it's possible to change the alignment of the destination. + // load the value, i.e. memset or memcpy from constant string. If 'DstAlign' + // is zero, it's possible to change the alignment of the destination. // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, IsMemset, ZeroMemset, MemcpyStrSrc, DAG.getMachineFunction()); + unsigned CommonAlign = MinAlign(DstAlign, SrcAlign); if (VT == MVT::Other) { // Use the largest integer type whose alignment constraints are satisfied. - // We only need to check DstAlign here as SrcAlign is always greater or - // equal to DstAlign (or zero). VT = MVT::i64; - while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && - !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) + while (CommonAlign && CommonAlign < VT.getSizeInBits() / 8 && + !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, CommonAlign)) VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); assert(VT.isInteger()); @@ -5389,7 +5386,7 @@ bool Fast; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast) + TLI.allowsMisalignedMemoryAccesses(VT, DstAS, CommonAlign, &Fast) && Fast) VTSize = Size; else { VT = NewVT; @@ -5442,8 +5439,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, - bool isVol, bool AlwaysInline, + uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool isVol, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memcpy of undef to nop. @@ -5465,16 +5463,14 @@ FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Align > SrcAlign) - SrcAlign = Align; ConstantDataArraySlice Slice; bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); + SrcAlign = std::max(SrcAlign, DAG.InferPtrAlignment(Src)); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), + (DstAlignCanChange ? 0 : DstAlign), (isZeroConstant ? 0 : SrcAlign), false, false, CopyFromConstant, true, DstPtrInfo.getAddrSpace(), @@ -5484,21 +5480,21 @@ if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); + unsigned NewDstAlign = (unsigned)DL.getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Align && - DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign /= 2; + while (NewDstAlign > DstAlign && + DL.exceedsNaturalStackAlignment(NewDstAlign)) + NewDstAlign /= 2; - if (NewAlign > Align) { + if (NewDstAlign > DstAlign) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) - MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + if (MFI.getObjectAlignment(FI->getIndex()) < NewDstAlign) + MFI.setObjectAlignment(FI->getIndex(), NewDstAlign); + DstAlign = NewDstAlign; } } @@ -5543,7 +5539,7 @@ if (Value.getNode()) { Store = DAG.getStore(Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, + DstPtrInfo.getWithOffset(DstOff), DstAlign, MMOFlags); OutChains.push_back(Store); } @@ -5572,7 +5568,7 @@ Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, DstAlign, MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -5628,7 +5624,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, + uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -5649,13 +5646,11 @@ FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Align > SrcAlign) - SrcAlign = Align; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); + SrcAlign = std::max(SrcAlign, DAG.InferPtrAlignment(Src)); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), SrcAlign, + (DstAlignCanChange ? 0 : DstAlign), SrcAlign, false, false, false, false, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), @@ -5664,12 +5659,12 @@ if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); - if (NewAlign > Align) { + unsigned NewDstAlign = (unsigned)DL.getABITypeAlignment(Ty); + if (NewDstAlign > DstAlign) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) - MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + if (MFI.getObjectAlignment(FI->getIndex()) < NewDstAlign) + MFI.setObjectAlignment(FI->getIndex(), NewDstAlign); + DstAlign = NewDstAlign; } } @@ -5707,7 +5702,7 @@ Store = DAG.getStore(Chain, dl, LoadValues[i], DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), DstAlign, MMOFlags); OutChains.push_back(Store); DstOff += VTSize; } @@ -5826,12 +5821,14 @@ } } -SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, +SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, + SDValue Dst, SDValue Src, SDValue Size, + unsigned DstAlign, unsigned SrcAlign, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + assert(DstAlign != 0 && SrcAlign != 0 && + "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -5841,9 +5838,9 @@ if (ConstantSize->isNullValue()) return Chain; - SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(),Align, - isVol, false, DstPtrInfo, SrcPtrInfo); + SDValue Result = getMemcpyLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + DstAlign, SrcAlign, isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -5852,8 +5849,8 @@ // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemcpy( - *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, - DstPtrInfo, SrcPtrInfo); + *this, dl, Chain, Dst, Src, Size, DstAlign, SrcAlign, + isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -5862,9 +5859,9 @@ // use a (potentially long) sequence of loads and stores. if (AlwaysInline) { assert(ConstantSize && "AlwaysInline requires a constant size!"); - return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, isVol, - true, DstPtrInfo, SrcPtrInfo); + return getMemcpyLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + DstAlign, SrcAlign, isVol, true, DstPtrInfo, SrcPtrInfo); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -5940,12 +5937,14 @@ return CallResult.second; } -SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, +SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, + SDValue Dst, SDValue Src, SDValue Size, + unsigned DstAlign, unsigned SrcAlign, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + assert(DstAlign != 0 && SrcAlign != 0 && + "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -5955,10 +5954,9 @@ if (ConstantSize->isNullValue()) return Chain; - SDValue Result = - getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, isVol, - false, DstPtrInfo, SrcPtrInfo); + SDValue Result = getMemmoveLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + DstAlign, SrcAlign, isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -5967,7 +5965,8 @@ // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemmove( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); + *this, dl, Chain, Dst, Src, Size, DstAlign, SrcAlign, + isVol, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5093,13 +5093,10 @@ // @llvm.memcpy defines 0 and 1 to both mean no alignment. unsigned DstAlign = std::max(MCI.getDestAlignment(), 1); unsigned SrcAlign = std::max(MCI.getSourceAlignment(), 1); - unsigned Align = MinAlign(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); - // FIXME: Support passing different dest/src alignments to the memcpy DAG - // node. - SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - false, isTC, + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, + DstAlign, SrcAlign, isVol, false, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MC); @@ -5127,13 +5124,11 @@ // @llvm.memmove defines 0 and 1 to both mean no alignment. unsigned DstAlign = std::max(MMI.getDestAlignment(), 1); unsigned SrcAlign = std::max(MMI.getSourceAlignment(), 1); - unsigned Align = MinAlign(DstAlign, SrcAlign); bool isVol = MMI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); - // FIXME: Support passing different dest/src alignments to the memmove DAG - // node. - SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - isTC, MachinePointerInfo(I.getArgOperand(0)), + SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, + DstAlign, SrcAlign, isVol, isTC, + MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MM); return nullptr; @@ -6804,11 +6799,9 @@ SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); - unsigned DstAlign = DAG.InferPtrAlignment(Dst); - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - unsigned Align = std::min(DstAlign, SrcAlign); - if (Align == 0) // Alignment of one or both could not be inferred. - Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved. + // Set alignment to 1 if it can't be inferred, as 0 is reserved + unsigned DstAlign = std::max(DAG.InferPtrAlignment(Dst), 1); + unsigned SrcAlign = std::max(DAG.InferPtrAlignment(Src), 1); bool isVol = false; SDLoc sdl = getCurSDLoc(); @@ -6816,7 +6809,8 @@ // In the mempcpy context we need to pass in a false value for isTailCall // because the return pointer needs to be adjusted by the size of // the copied memory. - SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol, + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, + DstAlign, SrcAlign, isVol, false, /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3720,10 +3720,11 @@ } if (Outs[i].Flags.isByVal()) { + unsigned Align = Outs[i].Flags.getByValAlign(); SDValue SizeNode = DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64); SDValue Cpy = DAG.getMemcpy( - Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(), + Chain, DL, DstAddr, Arg, SizeNode, Align, Align, /*isVol = */ false, /*AlwaysInline = */ false, /*isTailCall = */ false, DstInfo, MachinePointerInfo()); @@ -5095,7 +5096,7 @@ return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2), DAG.getConstant(VaListSize, DL, MVT::i32), - 8, false, false, false, MachinePointerInfo(DestSV), + 8, 8, false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1140,10 +1140,11 @@ // The default fallback uses the private pointer size as a guess for a type to // use. Make sure we switch these to 64-bit accesses. - if (Size >= 16 && DstAlign >= 4) // XXX: Should only do for global + // XXX: Should only do for global + if (Size >= 16 && DstAlign >= 4 && (SrcAlign == 0 || SrcAlign >= 4)) return MVT::v4i32; - if (Size >= 8 && DstAlign >= 4) + if (Size >= 8 && DstAlign >= 4 && (SrcAlign == 0 || SrcAlign >= 4)) return MVT::v2i32; // Use the default. @@ -2585,8 +2586,9 @@ if (Outs[i].Flags.isByVal()) { SDValue SizeNode = DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i32); + unsigned Align = Outs[i].Flags.getByValAlign(); SDValue Cpy = DAG.getMemcpy( - Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(), + Chain, DL, DstAddr, Arg, SizeNode, Align, Align, /*isVol = */ false, /*AlwaysInline = */ true, /*isTailCall = */ false, DstInfo, MachinePointerInfo(UndefValue::get(Type::getInt8PtrTy( Index: lib/Target/ARC/ARCISelLowering.cpp =================================================================== --- lib/Target/ARC/ARCISelLowering.cpp +++ lib/Target/ARC/ARCISelLowering.cpp @@ -570,8 +570,9 @@ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(FIN); MemOps.push_back(DAG.getMemcpy( - Chain, dl, FIN, ArgDI.SDV, DAG.getConstant(Size, dl, MVT::i32), Align, - false, false, false, MachinePointerInfo(), MachinePointerInfo())); + Chain, dl, FIN, ArgDI.SDV, DAG.getConstant(Size, dl, MVT::i32), + Align, Align, false, false, false, + MachinePointerInfo(), MachinePointerInfo())); } else { InVals.push_back(ArgDI.SDV); } Index: lib/Target/ARM/ARMSelectionDAGInfo.h =================================================================== --- lib/Target/ARM/ARMSelectionDAGInfo.h +++ lib/Target/ARM/ARMSelectionDAGInfo.h @@ -40,7 +40,8 @@ public: SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, + SDValue Size, unsigned DstAlign, + unsigned SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; @@ -48,8 +49,8 @@ SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo, + unsigned DstAlign, unsigned SrcAlign, + bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; // Adjust parameters for memset, see RTABI section 4.3.4 Index: lib/Target/ARM/ARMSelectionDAGInfo.cpp =================================================================== --- lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -127,8 +127,10 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, unsigned DstAlign, unsigned SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + unsigned Align = MinAlign(DstAlign, SrcAlign); const ARMSubtarget &Subtarget = DAG.getMachineFunction().getSubtarget(); // Do repeated 4-byte loads and stores. To be improved. @@ -241,8 +243,9 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, + SDValue Size, unsigned DstAlign, unsigned SrcAlign, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + unsigned Align = MinAlign(DstAlign, SrcAlign); return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, RTLIB::MEMMOVE); } Index: lib/Target/BPF/BPFSelectionDAGInfo.h =================================================================== --- lib/Target/BPF/BPFSelectionDAGInfo.h +++ lib/Target/BPF/BPFSelectionDAGInfo.h @@ -22,7 +22,8 @@ public: SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, + SDValue Size, unsigned DstAlign, + unsigned SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; Index: lib/Target/BPF/BPFSelectionDAGInfo.cpp =================================================================== --- lib/Target/BPF/BPFSelectionDAGInfo.cpp +++ lib/Target/BPF/BPFSelectionDAGInfo.cpp @@ -20,13 +20,15 @@ SDValue BPFSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, unsigned DstAlign, unsigned SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { // Requires the copy size to be a constant. ConstantSDNode *ConstantSize = dyn_cast(Size); if (!ConstantSize) return SDValue(); + unsigned Align = MinAlign(DstAlign, SrcAlign); unsigned CopyLen = ConstantSize->getZExtValue(); unsigned StoresNumEstimate = alignTo(CopyLen, Align) >> Log2_32(Align); // Impose the same copy length limit as MaxStoresPerMemcpy. Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -167,7 +167,8 @@ SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); - return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + unsigned Align = Flags.getByValAlign(); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Align, Align, /*isVolatile=*/false, /*AlwaysInline=*/false, /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); Index: lib/Target/Hexagon/HexagonSelectionDAGInfo.h =================================================================== --- lib/Target/Hexagon/HexagonSelectionDAGInfo.h +++ lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -24,7 +24,8 @@ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, + SDValue Size, unsigned DstAlign, + unsigned SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; Index: lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp =================================================================== --- lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -19,10 +19,12 @@ SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, unsigned DstAlign, unsigned SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast(Size); - if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize) + if (AlwaysInline || !ConstantSize || + (SrcAlign & 0x3) != 0 || (DstAlign & 0x3) != 0) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); Index: lib/Target/Lanai/LanaiISelLowering.cpp =================================================================== --- lib/Target/Lanai/LanaiISelLowering.cpp +++ lib/Target/Lanai/LanaiISelLowering.cpp @@ -639,7 +639,7 @@ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, DL, MVT::i32); - Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, + Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, Align, /*IsVolatile=*/false, /*AlwaysInline=*/false, /*isTailCall=*/false, MachinePointerInfo(), Index: lib/Target/Lanai/LanaiSelectionDAGInfo.h =================================================================== --- lib/Target/Lanai/LanaiSelectionDAGInfo.h +++ lib/Target/Lanai/LanaiSelectionDAGInfo.h @@ -25,7 +25,8 @@ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, + SDValue Size, unsigned DstAlign, + unsigned SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; Index: lib/Target/Lanai/LanaiSelectionDAGInfo.cpp =================================================================== --- lib/Target/Lanai/LanaiSelectionDAGInfo.cpp +++ lib/Target/Lanai/LanaiSelectionDAGInfo.cpp @@ -21,8 +21,8 @@ SDValue LanaiSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG & /*DAG*/, const SDLoc & /*dl*/, SDValue /*Chain*/, - SDValue /*Dst*/, SDValue /*Src*/, SDValue Size, unsigned /*Align*/, - bool /*isVolatile*/, bool /*AlwaysInline*/, + SDValue /*Dst*/, SDValue /*Src*/, SDValue Size, unsigned /*SrcAlign*/, + unsigned /*DstAlign*/, bool /*isVolatile*/, bool /*AlwaysInline*/, MachinePointerInfo /*DstPtrInfo*/, MachinePointerInfo /*SrcPtrInfo*/) const { ConstantSDNode *ConstantSize = dyn_cast(Size); Index: lib/Target/MSP430/MSP430ISelLowering.cpp =================================================================== --- lib/Target/MSP430/MSP430ISelLowering.cpp +++ lib/Target/MSP430/MSP430ISelLowering.cpp @@ -838,8 +838,8 @@ if (Flags.isByVal()) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i16); - MemOp = DAG.getMemcpy(Chain, dl, PtrOff, Arg, SizeNode, - Flags.getByValAlign(), + unsigned Align = Flags.getByValAlign(); + MemOp = DAG.getMemcpy(Chain, dl, PtrOff, Arg, SizeNode, Align, Align, /*isVolatile*/false, /*AlwaysInline=*/true, /*isTailCall=*/false, Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -4169,7 +4169,8 @@ DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, DL, PtrTy), - Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false, + Alignment, Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); MemOpChains.push_back(Chain); Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -3009,8 +3009,8 @@ // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true, - false, MachinePointerInfo(), MachinePointerInfo()); + DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, 8, false, + true, false, MachinePointerInfo(), MachinePointerInfo()); } SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, @@ -4830,7 +4830,8 @@ SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); - return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + unsigned Align = Flags.getByValAlign(); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Align, Align, false, false, false, MachinePointerInfo(), MachinePointerInfo()); } Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -1292,7 +1292,7 @@ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); - Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, + Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, Align, /*IsVolatile=*/false, /*AlwaysInline=*/false, IsTailCall, MachinePointerInfo(), Index: lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- lib/Target/Sparc/SparcISelLowering.cpp +++ lib/Target/Sparc/SparcISelLowering.cpp @@ -760,7 +760,7 @@ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue SizeNode = DAG.getConstant(Size, dl, MVT::i32); - Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align, + Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align, Align, false, // isVolatile, (Size <= 32), // AlwaysInline if size <= 32, false, // isTailCall Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2980,8 +2980,8 @@ SDLoc DL(Op); return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), - /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, - /*isTailCall*/false, + /*DstAlign*/8, /*SrcAlign*/8, /*isVolatile*/false, + /*AlwaysInline*/false, /*isTailCall*/false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } Index: lib/Target/SystemZ/SystemZSelectionDAGInfo.h =================================================================== --- lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -26,7 +26,8 @@ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool IsVolatile, + SDValue Size, unsigned DstAlign, + unsigned SrcAlign, bool IsVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; Index: lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -48,8 +48,9 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, unsigned DstAlign, unsigned SrcAlign, bool IsVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { if (IsVolatile) return SDValue(); Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -636,8 +636,9 @@ SDValue SizeNode = DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); + unsigned Align = Out.Flags.getByValAlign(); Chain = DAG.getMemcpy( - Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(), + Chain, DL, FINode, OutVal, SizeNode, Align, Align, /*isVolatile*/ false, /*AlwaysInline=*/false, /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); OutVal = FINode; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2775,8 +2775,9 @@ SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); + unsigned Align = Flags.getByValAlign(); - return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Align, Align, /*isVolatile*/false, /*AlwaysInline=*/true, /*isTailCall*/false, MachinePointerInfo(), MachinePointerInfo()); @@ -20846,7 +20847,7 @@ SDLoc DL(Op); return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, - DAG.getIntPtrConstant(24, DL), 8, /*isVolatile*/false, + DAG.getIntPtrConstant(24, DL), 8, 8, /*isVolatile*/false, false, false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } Index: lib/Target/X86/X86SelectionDAGInfo.h =================================================================== --- lib/Target/X86/X86SelectionDAGInfo.h +++ lib/Target/X86/X86SelectionDAGInfo.h @@ -39,7 +39,8 @@ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, + SDValue Size, unsigned DstAlign, + unsigned SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; Index: lib/Target/X86/X86SelectionDAGInfo.cpp =================================================================== --- lib/Target/X86/X86SelectionDAGInfo.cpp +++ lib/Target/X86/X86SelectionDAGInfo.cpp @@ -203,8 +203,9 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, unsigned DstAlign, unsigned SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -220,6 +221,7 @@ /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we /// would otherwise get. + unsigned Align = MinAlign(DstAlign, SrcAlign); if (!AlwaysInline && (Align & 3) != 0) return SDValue(); @@ -289,7 +291,8 @@ SrcVT)), DAG.getConstant(Repeats.BytesLeft(), dl, SizeVT), - Align, isVolatile, AlwaysInline, false, + DstAlign, SrcAlign, + isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } Index: lib/Target/XCore/XCoreISelLowering.cpp =================================================================== --- lib/Target/XCore/XCoreISelLowering.cpp +++ lib/Target/XCore/XCoreISelLowering.cpp @@ -1392,7 +1392,7 @@ InVals.push_back(FIN); MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV, DAG.getConstant(Size, dl, MVT::i32), - Align, false, false, false, + Align, Align, false, false, false, MachinePointerInfo(), MachinePointerInfo())); } else { @@ -1805,8 +1805,8 @@ return DAG.getMemmove(Chain, dl, ST->getBasePtr(), LD->getBasePtr(), DAG.getConstant(StoreBits/8, dl, MVT::i32), - Alignment, false, isTail, ST->getPointerInfo(), - LD->getPointerInfo()); + Alignment, Alignment, false, isTail, + ST->getPointerInfo(), LD->getPointerInfo()); } } break; Index: lib/Target/XCore/XCoreSelectionDAGInfo.h =================================================================== --- lib/Target/XCore/XCoreSelectionDAGInfo.h +++ lib/Target/XCore/XCoreSelectionDAGInfo.h @@ -24,7 +24,8 @@ public: SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, bool isVolatile, + SDValue Op3, unsigned DstAlign, + unsigned SrcAlign, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; Index: lib/Target/XCore/XCoreSelectionDAGInfo.cpp =================================================================== --- lib/Target/XCore/XCoreSelectionDAGInfo.cpp +++ lib/Target/XCore/XCoreSelectionDAGInfo.cpp @@ -18,11 +18,12 @@ SDValue XCoreSelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + SDValue Size, unsigned DstAlign, unsigned SrcAlign, bool isVolatile, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { unsigned SizeBitWidth = Size.getValueSizeInBits(); // Call __memcpy_4 if the src, dst and size are all 4 byte aligned. - if (!AlwaysInline && (Align & 3) == 0 && + if (!AlwaysInline && (SrcAlign & 3) == 0 && (DstAlign & 3) == 0 && DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) { const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); TargetLowering::ArgListTy Args; Index: test/CodeGen/X86/memcpy-different-align.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/memcpy-different-align.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux -mcpu=sandybridge < %s | FileCheck %s +declare void @llvm.memmove.p0i8.p0i8.i16(i8* %a, i8* %b, i16, i1) +declare void @llvm.memcpy.p0i8.p0i8.i16(i8* %a, i8* %b, i16, i1) + +define void @test_memcpy_16_8(i8* %a, i8* %b) { +; CHECK-LABEL: test_memcpy_16_8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovups (%rsi), %xmm0 +; CHECK-NEXT: vmovaps %xmm0, (%rdi) +; CHECK-NEXT: retq + call void @llvm.memcpy.p0i8.p0i8.i16(i8* align 16 %a, i8* align 8 %b, i16 16, i1 false) + ret void +} + +define void @test_memmove_16_8(i8* %a, i8* %b) { +; CHECK-LABEL: test_memmove_16_8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovups (%rsi), %xmm0 +; CHECK-NEXT: vmovaps %xmm0, (%rdi) +; CHECK-NEXT: retq + call void @llvm.memmove.p0i8.p0i8.i16(i8* align 16 %a, i8* align 8 %b, i16 16, i1 false) + ret void +} + +define void @test_memcpy_8_16(i8* %a, i8* %b) { +; CHECK-LABEL: test_memcpy_8_16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rsi), %xmm0 +; CHECK-NEXT: vmovups %xmm0, (%rdi) +; CHECK-NEXT: retq + call void @llvm.memcpy.p0i8.p0i8.i16(i8* align 8 %a, i8* align 16 %b, i16 16, i1 false) + ret void +} + +define void @test_memmove_8_16(i8* %a, i8* %b) { +; CHECK-LABEL: test_memmove_8_16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rsi), %xmm0 +; CHECK-NEXT: vmovups %xmm0, (%rdi) +; CHECK-NEXT: retq + call void @llvm.memmove.p0i8.p0i8.i16(i8* align 8 %a, i8* align 16 %b, i16 16, i1 false) + ret void +}