diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -182,7 +182,8 @@ /// Returns the best type to use with repmovs depending on alignment. static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget, - uint64_t Align) { + Align Alignment) { + uint64_t Align = Alignment.value(); assert((Align != 0) && "Align is normalized"); assert(isPowerOf2_64(Align) && "Align is a power of 2"); switch (Align) { @@ -204,7 +205,7 @@ static SDValue emitConstantSizeRepmov( SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT, - unsigned Align, bool isVolatile, bool AlwaysInline, + Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very @@ -219,10 +220,10 @@ assert(!Subtarget.hasERMSB() && "No efficient RepMovs"); /// We assume runtime memcpy will do a better job for unaligned copies when /// ERMS is not present. - if (!AlwaysInline && (Align & 3) != 0) + if (!AlwaysInline && (Alignment.value() & 3) != 0) return SDValue(); - const MVT BlockType = getOptimalRepmovsType(Subtarget, Align); + const MVT BlockType = getOptimalRepmovsType(Subtarget, Alignment); const uint64_t BlockBytes = BlockType.getSizeInBits() / 8; const uint64_t BlockCount = Size / BlockBytes; const uint64_t BytesLeft = Size % BlockBytes; @@ -251,7 +252,7 @@ Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)), DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), - DAG.getConstant(BytesLeft, dl, SizeVT), llvm::Align(Align), isVolatile, + DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, isVolatile, /*AlwaysInline*/ true, /*isTailCall*/ false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); @@ -281,10 +282,10 @@ /// Handle constant sizes, if (ConstantSDNode *ConstantSize = dyn_cast(Size)) - return emitConstantSizeRepmov( - DAG, Subtarget, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Size.getValueType(), Alignment.value(), isVolatile, AlwaysInline, - DstPtrInfo, SrcPtrInfo); + return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), + Size.getValueType(), Alignment, isVolatile, + AlwaysInline, DstPtrInfo, SrcPtrInfo); return SDValue(); } diff --git a/llvm/test/CodeGen/X86/pr61348.ll b/llvm/test/CodeGen/X86/pr61348.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr61348.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; REQUIRES: asserts +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s + +define i32 @PR61348() optsize { +; CHECK-LABEL: PR61348: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $3, %ecx +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: rep;movsq (%rsi), %es:(%rdi) +; CHECK-NEXT: movb 30, %al +; CHECK-NEXT: movb %al, 30 +; CHECK-NEXT: movzwl 28, %eax +; CHECK-NEXT: movw %ax, 28 +; CHECK-NEXT: movl 24, %eax +; CHECK-NEXT: movl %eax, 24 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq + tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 null, ptr align 1 null, i64 31, i1 true) + ret i32 0 +} +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)