diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5939,6 +5939,7 @@ /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::memcpy_inline: { @@ -5960,6 +5961,7 @@ /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::memset: { @@ -5976,6 +5978,7 @@ Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); updateDAGForMaybeTailCall(MS); + setValue(&I, MS); return; } case Intrinsic::memset_inline: { @@ -5994,6 +5997,7 @@ MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::memmove: { @@ -6015,6 +6019,7 @@ MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MM); + setValue(&I, MM); return; } case Intrinsic::memcpy_element_unordered_atomic: { @@ -6031,6 +6036,7 @@ isTC, MachinePointerInfo(MI.getRawDest()), MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::memmove_element_unordered_atomic: { @@ -6047,6 +6053,7 @@ isTC, MachinePointerInfo(MI.getRawDest()), MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::memset_element_unordered_atomic: { @@ -6062,6 +6069,7 @@ DAG.getAtomicMemset(getRoot(), sdl, Dst, Val, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest())); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::call_preallocated_setup: { diff --git a/llvm/test/CodeGen/AArch64/SelectionDAG/arm64-pcsections-selectiondag.ll b/llvm/test/CodeGen/AArch64/SelectionDAG/arm64-pcsections-selectiondag.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/SelectionDAG/arm64-pcsections-selectiondag.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -stop-after=aarch64-expand-pseudo -verify-machineinstrs | FileCheck %s + +define i64 @call_memcpy_intrinsic(ptr %src, ptr %dst) { + ; CHECK-LABEL: name: call_memcpy_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = LDRBBui renamable $x1, 0 :: (volatile load (s8) from %ir.dst) + ; CHECK-NEXT: STRBBui killed renamable $w8, killed renamable $x0, 0, pcsections !0 :: (volatile store (s8) into %ir.src) + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x1, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memcpy.p0.p0.i64(ptr %src, ptr %dst, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memcpy_inline_intrinsic(ptr %src, ptr %dst) { + ; CHECK-LABEL: name: call_memcpy_inline_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = LDRBBui renamable $x1, 0 :: (volatile load (s8) from %ir.dst) + ; CHECK-NEXT: STRBBui killed renamable $w8, killed renamable $x0, 0, pcsections !0 :: (volatile store (s8) into %ir.src) + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x1, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memcpy.inline.p0.p0.i64(ptr %src, ptr %dst, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memmove_intrinsic(ptr %src, ptr %dst) { + ; CHECK-LABEL: name: call_memmove_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = LDRBBui renamable $x1, 0 :: (volatile load (s8) from %ir.dst) + ; CHECK-NEXT: STRBBui killed renamable $w8, killed renamable $x0, 0, pcsections !0 :: (volatile store (s8) into %ir.src) + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x1, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memmove.p0.p0.i64(ptr %src, ptr %dst, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memmove_inline_intrinsic(ptr %src, ptr %dst) { + ; CHECK-LABEL: name: call_memmove_inline_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = LDRBBui renamable $x1, 0 :: (volatile load (s8) from %ir.dst) + ; CHECK-NEXT: STRBBui killed renamable $w8, killed renamable $x0, 0, pcsections !0 :: (volatile store (s8) into %ir.src) + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x1, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memmove.inline.p0.p0.i64(ptr %src, ptr %dst, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_intrinsic(ptr %dst) { + ; CHECK-LABEL: name: call_memset_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: STRBBui $wzr, renamable $x0, 0, pcsections !0 :: (volatile store (s8) into %ir.dst) + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x0, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memset.p0.p0.i64(ptr %dst, i8 0, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_inline_intrinsic(ptr %dst) { + ; CHECK-LABEL: name: call_memset_inline_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: STRBBui $wzr, renamable $x0, 0, pcsections !0 :: (volatile store (s8) into %ir.dst) + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x0, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memset.inline.p0.p0.i64(ptr %dst, i8 0, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memcpy_element_unordered_atomic_intrinsic() { + ; CHECK-LABEL: name: call_memcpy_element_unordered_atomic_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: $x0 = ADDXri $sp, 12, 0 + ; CHECK-NEXT: $x1 = ADDXri $sp, 8, 0 + ; CHECK-NEXT: dead $w2 = MOVZWi 1, 0, implicit-def $x2 + ; CHECK-NEXT: BL &__llvm_memcpy_element_unordered_atomic_1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit-def $sp + ; CHECK-NEXT: renamable $x0 = LDRXui $sp, 1 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + %src = alloca i32, align 1 + %dst = alloca i32, align 1 + call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 %src, ptr align 1 %dst, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memmove_element_unordered_atomic_intrinsic() { + ; CHECK-LABEL: name: call_memmove_element_unordered_atomic_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: $x0 = ADDXri $sp, 12, 0 + ; CHECK-NEXT: $x1 = ADDXri $sp, 8, 0 + ; CHECK-NEXT: dead $w2 = MOVZWi 1, 0, implicit-def $x2 + ; CHECK-NEXT: BL &__llvm_memmove_element_unordered_atomic_1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit-def $sp + ; CHECK-NEXT: renamable $x0 = LDRXui $sp, 1 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + %src = alloca i32, align 1 + %dst = alloca i32, align 1 + call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 %src, ptr align 1 %dst, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_element_unordered_atomic_intrinsic() { + ; CHECK-LABEL: name: call_memset_element_unordered_atomic_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.1) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: $x0 = ADDXri $sp, 12, 0 + ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0 + ; CHECK-NEXT: dead $w2 = MOVZWi 1, 0, implicit-def $x2 + ; CHECK-NEXT: BL &__llvm_memset_element_unordered_atomic_1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $w1, implicit killed $x2, implicit-def $sp + ; CHECK-NEXT: renamable $x0 = LDURXi $sp, 12 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + %dst = alloca i32, align 1 + call void @llvm.memset.element.unordered.atomic.p0.p0.i64(ptr align 1%dst, i8 0, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + + +!0 = !{!"foo"} + +declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) +declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) +declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) +declare void @llvm.memmove.inline.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) +declare void @llvm.memset.p0.p0.i64(ptr nocapture, i8, i64, i1) +declare void @llvm.memset.inline.p0.p0.i64(ptr nocapture, i8, i64, i1) +declare void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) +declare void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) +declare void @llvm.memset.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, i8, i64, i32) diff --git a/llvm/test/CodeGen/X86/SelectionDAG/x64-pcsections-memtransfer.ll b/llvm/test/CodeGen/X86/SelectionDAG/x64-pcsections-memtransfer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/SelectionDAG/x64-pcsections-memtransfer.ll @@ -0,0 +1,233 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -stop-after=finalize-isel -verify-machineinstrs | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -stop-after=finalize-isel -verify-machineinstrs | FileCheck %s -check-prefix=X32 + +define i64 @call_memmove_intrinsic(ptr %src, ptr %dst) { + ; X64-LABEL: name: call_memmove_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: liveins: $rdi, $rsi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; X64-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; X64-NEXT: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (volatile load (s8) from %ir.dst) + ; X64-NEXT: MOV8mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV8rm]], pcsections !0 :: (volatile store (s8) into %ir.src) + ; X64-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rax = COPY [[MOV64rm]] + ; X64-NEXT: RET 0, $rax + ; X32-LABEL: name: call_memmove_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1, align 16) + ; X32-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0) + ; X32-NEXT: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[MOV32rm1]], 1, $noreg, 0, $noreg :: (volatile load (s8) from %ir.dst) + ; X32-NEXT: MOV8mr killed [[MOV32rm]], 1, $noreg, 0, $noreg, killed [[MOV8rm]], pcsections !0 :: (volatile store (s8) into %ir.src) + ; X32-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm1]], 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $eax = COPY [[MOV32rm2]] + ; X32-NEXT: $edx = COPY [[MOV32rm3]] + ; X32-NEXT: RET 0, $eax, $edx + call void @llvm.memmove.p0.p0.i64(ptr %src, ptr %dst, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memmove_inline_intrinsic(ptr %src, ptr %dst) { + ; X64-LABEL: name: call_memmove_inline_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: liveins: $rdi, $rsi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; X64-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; X64-NEXT: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (volatile load (s8) from %ir.dst) + ; X64-NEXT: MOV8mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV8rm]], pcsections !0 :: (volatile store (s8) into %ir.src) + ; X64-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rax = COPY [[MOV64rm]] + ; X64-NEXT: RET 0, $rax + ; X32-LABEL: name: call_memmove_inline_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.1, align 16) + ; X32-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0) + ; X32-NEXT: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm [[MOV32rm1]], 1, $noreg, 0, $noreg :: (volatile load (s8) from %ir.dst) + ; X32-NEXT: MOV8mr killed [[MOV32rm]], 1, $noreg, 0, $noreg, killed [[MOV8rm]], pcsections !0 :: (volatile store (s8) into %ir.src) + ; X32-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm1]], 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $eax = COPY [[MOV32rm2]] + ; X32-NEXT: $edx = COPY [[MOV32rm3]] + ; X32-NEXT: RET 0, $eax, $edx + call void @llvm.memmove.inline.p0.p0.i64(ptr %src, ptr %dst, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_intrinsic(ptr %dst) { + ; X64-LABEL: name: call_memset_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: liveins: $rdi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi + ; X64-NEXT: MOV8mi [[COPY]], 1, $noreg, 0, $noreg, 0, pcsections !0 :: (volatile store (s8) into %ir.dst) + ; X64-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rax = COPY [[MOV64rm]] + ; X64-NEXT: RET 0, $rax + ; X32-LABEL: name: call_memset_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16) + ; X32-NEXT: MOV8mi [[MOV32rm]], 1, $noreg, 0, $noreg, 0, pcsections !0 :: (volatile store (s8) into %ir.dst) + ; X32-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm]], 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: $eax = COPY [[MOV32rm2]] + ; X32-NEXT: $edx = COPY [[MOV32rm1]] + ; X32-NEXT: RET 0, $eax, $edx + call void @llvm.memset.p0.p0.i64(ptr %dst, i8 0, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_inline_intrinsic(ptr %dst) { + ; X64-LABEL: name: call_memset_inline_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: liveins: $rdi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi + ; X64-NEXT: MOV8mi [[COPY]], 1, $noreg, 0, $noreg, 0, pcsections !0 :: (volatile store (s8) into %ir.dst) + ; X64-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rax = COPY [[MOV64rm]] + ; X64-NEXT: RET 0, $rax + ; X32-LABEL: name: call_memset_inline_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16) + ; X32-NEXT: MOV8mi [[MOV32rm]], 1, $noreg, 0, $noreg, 0, pcsections !0 :: (volatile store (s8) into %ir.dst) + ; X32-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm]], 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[MOV32rm]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: $eax = COPY [[MOV32rm2]] + ; X32-NEXT: $edx = COPY [[MOV32rm1]] + ; X32-NEXT: RET 0, $eax, $edx + call void @llvm.memset.inline.p0.p0.i64(ptr %dst, i8 0, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memcpy_element_unordered_atomic_intrinsic() { + ; X64-LABEL: name: call_memcpy_element_unordered_atomic_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; X64-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0.src, 1, $noreg, 0, $noreg + ; X64-NEXT: [[LEA64r1:%[0-9]+]]:gr64 = LEA64r %stack.1.dst, 1, $noreg, 0, $noreg + ; X64-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 1 + ; X64-NEXT: $rdi = COPY [[LEA64r]] + ; X64-NEXT: $rsi = COPY [[LEA64r1]] + ; X64-NEXT: $rdx = COPY [[MOV32ri64_]] + ; X64-NEXT: CALL64pcrel32 target-flags(x86-plt) &__llvm_memcpy_element_unordered_atomic_1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit-def $rsp, implicit-def $ssp + ; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp, pcsections !0 + ; X64-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.1.dst, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rax = COPY [[MOV64rm]] + ; X64-NEXT: RET 0, $rax + ; X32-LABEL: name: call_memcpy_element_unordered_atomic_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + ; X32-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esp + ; X32-NEXT: [[LEA32r:%[0-9]+]]:gr32 = LEA32r %stack.1.dst, 1, $noreg, 0, $noreg + ; X32-NEXT: MOV32mr [[COPY]], 1, $noreg, 4, $noreg, killed [[LEA32r]] :: (store (s32) into stack + 4) + ; X32-NEXT: [[LEA32r1:%[0-9]+]]:gr32 = LEA32r %stack.0.src, 1, $noreg, 0, $noreg + ; X32-NEXT: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, killed [[LEA32r1]] :: (store (s32) into stack) + ; X32-NEXT: MOV32mi [[COPY]], 1, $noreg, 12, $noreg, 0 :: (store (s32) into stack + 12) + ; X32-NEXT: MOV32mi [[COPY]], 1, $noreg, 8, $noreg, 1 :: (store (s32) into stack + 8) + ; X32-NEXT: CALLpcrel32 &__llvm_memcpy_element_unordered_atomic_1, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp + ; X32-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp, pcsections !0 + ; X32-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.1.dst, 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.1.dst, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $eax = COPY [[MOV32rm]] + ; X32-NEXT: $edx = COPY [[MOV32rm1]] + ; X32-NEXT: RET 0, $eax, $edx + %src = alloca i32, align 1 + %dst = alloca i32, align 1 + call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 %src, ptr align 1 %dst, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memmove_element_unordered_atomic_intrinsic() { + ; X64-LABEL: name: call_memmove_element_unordered_atomic_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; X64-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0.src, 1, $noreg, 0, $noreg + ; X64-NEXT: [[LEA64r1:%[0-9]+]]:gr64 = LEA64r %stack.1.dst, 1, $noreg, 0, $noreg + ; X64-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 1 + ; X64-NEXT: $rdi = COPY [[LEA64r]] + ; X64-NEXT: $rsi = COPY [[LEA64r1]] + ; X64-NEXT: $rdx = COPY [[MOV32ri64_]] + ; X64-NEXT: CALL64pcrel32 target-flags(x86-plt) &__llvm_memmove_element_unordered_atomic_1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit-def $rsp, implicit-def $ssp + ; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp, pcsections !0 + ; X64-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.1.dst, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rax = COPY [[MOV64rm]] + ; X64-NEXT: RET 0, $rax + ; X32-LABEL: name: call_memmove_element_unordered_atomic_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + ; X32-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esp + ; X32-NEXT: [[LEA32r:%[0-9]+]]:gr32 = LEA32r %stack.1.dst, 1, $noreg, 0, $noreg + ; X32-NEXT: MOV32mr [[COPY]], 1, $noreg, 4, $noreg, killed [[LEA32r]] :: (store (s32) into stack + 4) + ; X32-NEXT: [[LEA32r1:%[0-9]+]]:gr32 = LEA32r %stack.0.src, 1, $noreg, 0, $noreg + ; X32-NEXT: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, killed [[LEA32r1]] :: (store (s32) into stack) + ; X32-NEXT: MOV32mi [[COPY]], 1, $noreg, 12, $noreg, 0 :: (store (s32) into stack + 12) + ; X32-NEXT: MOV32mi [[COPY]], 1, $noreg, 8, $noreg, 1 :: (store (s32) into stack + 8) + ; X32-NEXT: CALLpcrel32 &__llvm_memmove_element_unordered_atomic_1, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp + ; X32-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp, pcsections !0 + ; X32-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.1.dst, 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.1.dst, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $eax = COPY [[MOV32rm]] + ; X32-NEXT: $edx = COPY [[MOV32rm1]] + ; X32-NEXT: RET 0, $eax, $edx + %src = alloca i32, align 1 + %dst = alloca i32, align 1 + call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 %src, ptr align 1 %dst, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_element_unordered_atomic_intrinsic() { + ; X64-LABEL: name: call_memset_element_unordered_atomic_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; X64-NEXT: [[LEA64r:%[0-9]+]]:gr64 = LEA64r %stack.0.dst, 1, $noreg, 0, $noreg + ; X64-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags + ; X64-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 1 + ; X64-NEXT: $rdi = COPY [[LEA64r]] + ; X64-NEXT: $esi = COPY [[MOV32r0_]] + ; X64-NEXT: $rdx = COPY [[MOV32ri64_]] + ; X64-NEXT: CALL64pcrel32 target-flags(x86-plt) &__llvm_memset_element_unordered_atomic_1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $rdx, implicit-def $rsp, implicit-def $ssp + ; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp, pcsections !0 + ; X64-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.0.dst, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rax = COPY [[MOV64rm]] + ; X64-NEXT: RET 0, $rax + ; X32-LABEL: name: call_memset_element_unordered_atomic_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp + ; X32-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esp + ; X32-NEXT: [[LEA32r:%[0-9]+]]:gr32 = LEA32r %stack.0.dst, 1, $noreg, 0, $noreg + ; X32-NEXT: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, killed [[LEA32r]] :: (store (s32) into stack) + ; X32-NEXT: MOV32mi [[COPY]], 1, $noreg, 12, $noreg, 0 :: (store (s32) into stack + 12) + ; X32-NEXT: MOV32mi [[COPY]], 1, $noreg, 8, $noreg, 1 :: (store (s32) into stack + 8) + ; X32-NEXT: MOV32mi [[COPY]], 1, $noreg, 4, $noreg, 0 :: (store (s32) into stack + 4) + ; X32-NEXT: CALLpcrel32 &__llvm_memset_element_unordered_atomic_1, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp + ; X32-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def dead $esp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $esp, implicit $ssp, pcsections !0 + ; X32-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.0.dst, 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.0.dst, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $eax = COPY [[MOV32rm]] + ; X32-NEXT: $edx = COPY [[MOV32rm1]] + ; X32-NEXT: RET 0, $eax, $edx + %dst = alloca i32, align 1 + call void @llvm.memset.element.unordered.atomic.p0.p0.i64(ptr align 1%dst, i8 0, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + + +!0 = !{!"foo"} + +declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) +declare void @llvm.memmove.inline.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) +declare void @llvm.memset.p0.p0.i64(ptr nocapture, i8, i64, i1) +declare void @llvm.memset.inline.p0.p0.i64(ptr nocapture, i8, i64, i1) +declare void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) +declare void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) +declare void @llvm.memset.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, i8, i64, i32)