diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -2280,7 +2280,8 @@ } /// Set PCSections to be associated with Node. void addPCSections(const SDNode *Node, MDNode *MD) { - SDEI[Node].PCSections = MD; + SmallPtrSet Once{}; + addPCSectionsr(Node, MD, Once); } /// Return PCSections associated with Node, or nullptr if none exists. MDNode *getPCSections(const SDNode *Node) const { @@ -2358,6 +2359,18 @@ SDNode *FindNodeOrInsertPos(const FoldingSetNodeID &ID, const SDLoc &DL, void *&InsertPos); + /// Recursively set PCSections to be associated with Node and all its values. + void addPCSectionsr(const SDNode *Node, MDNode *MD, + SmallPtrSet Once) { + if (!Once.insert(Node).second) // If we've been here before, return now. + return; + + SDEI[Node].PCSections = MD; + + for (const SDValue &Op : Node->op_values()) + addPCSectionsr(Op.getNode(), MD, Once); + } + /// Maps to auto-CSE operations. std::vector CondCodeNodes; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -45,6 +45,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/SwiftErrorValueTracking.h" @@ -5939,6 +5940,7 @@ /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::memcpy_inline: { @@ -5960,6 +5962,7 @@ /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::memset: { @@ -5976,6 +5979,7 @@ Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); updateDAGForMaybeTailCall(MS); + setValue(&I, MS); return; } case Intrinsic::memset_inline: { @@ -5994,6 +5998,7 @@ MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::memmove: { @@ -6015,6 +6020,7 @@ MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MM); + setValue(&I, MM); return; } case Intrinsic::memcpy_element_unordered_atomic: { @@ -6031,6 +6037,7 @@ isTC, MachinePointerInfo(MI.getRawDest()), MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::memmove_element_unordered_atomic: { @@ -6047,6 +6054,7 @@ isTC, MachinePointerInfo(MI.getRawDest()), MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::memset_element_unordered_atomic: { @@ -6062,6 +6070,7 @@ DAG.getAtomicMemset(getRoot(), sdl, Dst, Val, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest())); updateDAGForMaybeTailCall(MC); + setValue(&I, MC); return; } case Intrinsic::call_preallocated_setup: { diff --git a/llvm/test/CodeGen/AArch64/pcsections-memtransfer.ll b/llvm/test/CodeGen/AArch64/pcsections-memtransfer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pcsections-memtransfer.ll @@ -0,0 +1,179 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -global-isel=0 -mtriple=aarch64-unknown-linux-gnu -stop-after=aarch64-expand-pseudo -verify-machineinstrs | FileCheck %s + +define i64 @call_memcpy_intrinsic(ptr %src, ptr %dst, i64 %len) { + ; CHECK-LABEL: name: call_memcpy_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x19, $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store (s64) into %stack.1), (store (s64) into %stack.0) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: $x19 = ORRXrs $xzr, $x1, 0 + ; CHECK-NEXT: BL &memcpy, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit-def $sp, implicit-def dead $x0, pcsections !0 + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x19, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.1), (load (s64) from %stack.0) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memcpy.p0.p0.i64(ptr %src, ptr %dst, i64 %len, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memcpy_intrinsic_sm(ptr %src, ptr %dst) { + ; CHECK-LABEL: name: call_memcpy_intrinsic_sm + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = LDRBBui renamable $x1, 0, pcsections !0 :: (volatile load (s8) from %ir.dst) + ; CHECK-NEXT: STRBBui killed renamable $w8, killed renamable $x0, 0, pcsections !0 :: (volatile store (s8) into %ir.src) + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x1, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memcpy.p0.p0.i64(ptr %src, ptr %dst, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memcpy_inline_intrinsic(ptr %src, ptr %dst) { + ; CHECK-LABEL: name: call_memcpy_inline_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w8 = LDRBBui renamable $x1, 0, pcsections !0 :: (volatile load (s8) from %ir.dst) + ; CHECK-NEXT: STRBBui killed renamable $w8, killed renamable $x0, 0, pcsections !0 :: (volatile store (s8) into %ir.src) + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x1, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memcpy.inline.p0.p0.i64(ptr %src, ptr %dst, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memmove_intrinsic(ptr %src, ptr %dst, i64 %len) { + ; CHECK-LABEL: name: call_memmove_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x19, $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store (s64) into %stack.1), (store (s64) into %stack.0) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: $x19 = ORRXrs $xzr, $x1, 0 + ; CHECK-NEXT: BL &memmove, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit-def $sp, implicit-def dead $x0, pcsections !0 + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x19, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.1), (load (s64) from %stack.0) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memmove.p0.p0.i64(ptr %src, ptr %dst, i64 %len, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_intrinsic(ptr %dst, i64 %len) { + ; CHECK-LABEL: name: call_memset_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0, $x1, $x19, $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store (s64) into %stack.1), (store (s64) into %stack.0) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: $x2 = ORRXrs $xzr, $x1, 0 + ; CHECK-NEXT: $x19 = ORRXrs $xzr, $x0, 0 + ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0 + ; CHECK-NEXT: BL &memset, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $w1, implicit $x2, implicit-def $sp, implicit-def dead $x0, pcsections !0 + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x19, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.1), (load (s64) from %stack.0) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memset.p0.p0.i64(ptr %dst, i8 0, i64 %len, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_inline_intrinsic(ptr %dst) { + ; CHECK-LABEL: name: call_memset_inline_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: STRBBui $wzr, renamable $x0, 0, pcsections !0 :: (volatile store (s8) into %ir.dst) + ; CHECK-NEXT: renamable $x0 = LDRXui killed renamable $x0, 0 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + call void @llvm.memset.inline.p0.p0.i64(ptr %dst, i8 0, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memcpy_element_unordered_atomic_intrinsic() { + ; CHECK-LABEL: name: call_memcpy_element_unordered_atomic_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: $x0 = ADDXri $sp, 12, 0 + ; CHECK-NEXT: $x1 = ADDXri $sp, 8, 0 + ; CHECK-NEXT: dead $w2 = MOVZWi 1, 0, implicit-def $x2 + ; CHECK-NEXT: BL &__llvm_memcpy_element_unordered_atomic_1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit-def $sp, pcsections !0 + ; CHECK-NEXT: renamable $x0 = LDRXui $sp, 1 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + %src = alloca i32, align 1 + %dst = alloca i32, align 1 + call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 %src, ptr align 1 %dst, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memmove_element_unordered_atomic_intrinsic() { + ; CHECK-LABEL: name: call_memmove_element_unordered_atomic_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.2) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: $x0 = ADDXri $sp, 12, 0 + ; CHECK-NEXT: $x1 = ADDXri $sp, 8, 0 + ; CHECK-NEXT: dead $w2 = MOVZWi 1, 0, implicit-def $x2 + ; CHECK-NEXT: BL &__llvm_memmove_element_unordered_atomic_1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit-def $sp, pcsections !0 + ; CHECK-NEXT: renamable $x0 = LDRXui $sp, 1 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + %src = alloca i32, align 1 + %dst = alloca i32, align 1 + call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 %src, ptr align 1 %dst, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_element_unordered_atomic_intrinsic() { + ; CHECK-LABEL: name: call_memset_element_unordered_atomic_intrinsic + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: liveins: $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.1) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: $x0 = ADDXri $sp, 12, 0 + ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0 + ; CHECK-NEXT: dead $w2 = MOVZWi 1, 0, implicit-def $x2 + ; CHECK-NEXT: BL &__llvm_memset_element_unordered_atomic_1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $w1, implicit killed $x2, implicit-def $sp, pcsections !0 + ; CHECK-NEXT: renamable $x0 = LDURXi $sp, 12 :: (load (s64) from %ir.dst) + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1) + ; CHECK-NEXT: RET undef $lr, implicit $x0 + %dst = alloca i32, align 1 + call void @llvm.memset.element.unordered.atomic.p0.p0.i64(ptr align 1 %dst, i8 0, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + + +!0 = !{!"foo"} + +declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) +declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) +declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) +declare void @llvm.memset.p0.p0.i64(ptr nocapture, i8, i64, i1) +declare void @llvm.memset.inline.p0.p0.i64(ptr nocapture, i8, i64, i1) +declare void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) +declare void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) +declare void @llvm.memset.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, i8, i64, i32) diff --git a/llvm/test/CodeGen/AArch64/pcsections.ll b/llvm/test/CodeGen/AArch64/pcsections.ll --- a/llvm/test/CodeGen/AArch64/pcsections.ll +++ b/llvm/test/CodeGen/AArch64/pcsections.ll @@ -2,7 +2,7 @@ ; RUN: llc -O1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-OPT,DEFCM ; RUN: llc -O2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-OPT,DEFCM ; RUN: llc -O3 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-OPT,DEFCM -; RUN: llc -O1 -code-model=large < %s | FileCheck %s --check-prefixes=CHECK,CHECK-OPT,LARGE +; RUN: llc -O1 -code-model=large < %s | FileCheck %s --check-prefixes=CHECK,LARGE target triple = "aarch64-unknown-linux-gnu" @@ -14,24 +14,61 @@ ; CHECK: .Lfunc_begin0: ; CHECK: // %bb.0: // %entry ; CHECK: .Lpcsection0: +; -- +; LARGE-NEXT: movz +; LARGE: .Lpcsection1: +; LARGE-NEXT: movk +; LARGE: .Lpcsection2: +; LARGE-NEXT: movk +; LARGE: .Lpcsection3: +; LARGE-NEXT: movk +; LARGE: .Lpcsection4: +; -- ; CHECK-NEXT: ldr ; CHECK-NEXT: ret ; CHECK: .section section_no_aux,"awo",@progbits,.text -; CHECK-NEXT: .Lpcsection_base0: +; -- +; DEFCM-NEXT: .Lpcsection_base0: ; DEFCM-NEXT: .word .Lfunc_begin0-.Lpcsection_base0 -; LARGE-NEXT: .xword .Lfunc_begin0-.Lpcsection_base0 -; CHECK-NEXT: .word .Lfunc_end0-.Lfunc_begin0 -; CHECK-NEXT: .section section_aux_42,"awo",@progbits,.text -; CHECK-NEXT: .Lpcsection_base1: +; DEFCM-NEXT: .word .Lfunc_end0-.Lfunc_begin0 +; DEFCM-NEXT: .section section_aux_42,"awo",@progbits,.text +; DEFCM-NEXT: .Lpcsection_base1: ; DEFCM-NEXT: .word .Lpcsection0-.Lpcsection_base1 -; LARGE-NEXT: .xword .Lpcsection0-.Lpcsection_base1 -; CHECK-NEXT: .word 42 -; CHECK-NEXT: .section section_aux_21264,"awo",@progbits,.text -; CHECK-NEXT: .Lpcsection_base2: +; DEFCM-NEXT: .word 42 +; DEFCM-NEXT: .section section_aux_21264,"awo",@progbits,.text +; DEFCM-NEXT: .Lpcsection_base2: ; DEFCM-NEXT: .word .Lpcsection0-.Lpcsection_base2 -; LARGE-NEXT: .xword .Lpcsection0-.Lpcsection_base2 -; CHECK-NEXT: .word 21264 -; CHECK-NEXT: .text +; DEFCM-NEXT: .word 21264 +; -- +; LARGE-NEXT: .Lpcsection_base0: +; LARGE-NEXT: .xword .Lfunc_begin0-.Lpcsection_base0 +; LARGE-NEXT: .word .Lfunc_end0-.Lfunc_begin0 +; LARGE-NEXT: .section section_aux_42,"awo",@progbits,.text +; LARGE-NEXT: .Lpcsection_base1: +; LARGE-NEXT: .xword .Lpcsection0-.Lpcsection_base1 +; LARGE-NEXT: .Lpcsection_base2: +; LARGE-NEXT: .xword .Lpcsection1-.Lpcsection_base2 +; LARGE-NEXT: .Lpcsection_base3: +; LARGE-NEXT: .xword .Lpcsection2-.Lpcsection_base3 +; LARGE-NEXT: .Lpcsection_base4: +; LARGE-NEXT: .xword .Lpcsection3-.Lpcsection_base4 +; LARGE-NEXT: .Lpcsection_base5: +; LARGE-NEXT: .xword .Lpcsection4-.Lpcsection_base5 +; LARGE-NEXT: .word 42 +; LARGE-NEXT: .section section_aux_21264,"awo",@progbits,.text +; LARGE-NEXT: .Lpcsection_base6: +; LARGE-NEXT: .xword .Lpcsection0-.Lpcsection_base6 +; LARGE-NEXT: .Lpcsection_base7: +; LARGE-NEXT: .xword .Lpcsection1-.Lpcsection_base7 +; LARGE-NEXT: .Lpcsection_base8: +; LARGE-NEXT: .xword .Lpcsection2-.Lpcsection_base8 +; LARGE-NEXT: .Lpcsection_base9: +; LARGE-NEXT: .xword .Lpcsection3-.Lpcsection_base9 +; LARGE-NEXT: .Lpcsection_base10: +; LARGE-NEXT: .xword .Lpcsection4-.Lpcsection_base10 +; LARGE-NEXT: .word 21264 +; -- +; CHECK-NEXT: .text entry: %0 = load i64, ptr @bar, align 8, !pcsections !1 ret i64 %0 @@ -39,16 +76,46 @@ define i64 @test_simple_atomic() { ; CHECK-LABEL: test_simple_atomic: -; CHECK: .Lpcsection1: -; CHECK-NEXT: ldr -; CHECK-NOT: .Lpcsection2 -; CHECK: ldr +; -- +; DEFCM: .Lpcsection1: +; DEFCM-NEXT: ldr +; DEFCM-NOT: .Lpcsection2 +; DEFCM: ldr +; -- +; LARGE: .Lpcsection5: +; LARGE-NEXT: movz +; LARGE-NEXT: movz +; LARGE: .Lpcsection6: +; LARGE-NEXT: movk +; LARGE-NEXT: movk +; LARGE: .Lpcsection7: +; LARGE-NEXT: movk +; LARGE-NEXT: movk +; LARGE: .Lpcsection8: +; LARGE-NEXT: movk +; LARGE-NEXT: movk +; LARGE: .Lpcsection9: +; LARGE-NEXT: ldr +; LARGE-NEXT: ldr +; -- ; CHECK: add ; CHECK-NEXT: ret ; CHECK: .section section_no_aux,"awo",@progbits,.text -; CHECK-NEXT: .Lpcsection_base3: +; -- +; DEFCM-NEXT: .Lpcsection_base3: ; DEFCM-NEXT: .word .Lpcsection1-.Lpcsection_base3 -; LARGE-NEXT: .xword .Lpcsection1-.Lpcsection_base3 +; -- +; LARGE-NEXT: .Lpcsection_base11: +; LARGE-NEXT: .xword .Lpcsection5-.Lpcsection_base11 +; LARGE-NEXT: .Lpcsection_base12: +; LARGE-NEXT: .xword .Lpcsection6-.Lpcsection_base12 +; LARGE-NEXT: .Lpcsection_base13: +; LARGE-NEXT: .xword .Lpcsection7-.Lpcsection_base13 +; LARGE-NEXT: .Lpcsection_base14: +; LARGE-NEXT: .xword .Lpcsection8-.Lpcsection_base14 +; LARGE-NEXT: .Lpcsection_base15: +; LARGE-NEXT: .xword .Lpcsection9-.Lpcsection_base15 +; -- ; CHECK-NEXT: .text entry: %0 = load atomic i64, ptr @foo monotonic, align 8, !pcsections !0 @@ -84,23 +151,78 @@ ; CHECK-UNOPT: .Lpcsection13: ; CHECK-UNOPT-NEXT: b ; --- +; LARGE: .Lpcsection10: +; LARGE-NEXT: movz +; LARGE-NEXT: .Lpcsection11: +; LARGE-NEXT: movk +; LARGE-NEXT: .Lpcsection12: +; LARGE-NEXT: movk +; LARGE-NEXT: .Lpcsection13: +; LARGE-NEXT: movk +; LARGE: .Lpcsection14: +; LARGE-NEXT: ldxr +; LARGE-NEXT: .Lpcsection15: +; LARGE-NEXT: add +; LARGE-NEXT: .Lpcsection16: +; LARGE-NEXT: stxr +; LARGE-NEXT: .Lpcsection17: +; LARGE-NEXT: cbnz +; --- ; CHECK-NOT: .Lpcsection ; CHECK: ldr ; CHECK: ret ; CHECK: .section section_no_aux,"awo",@progbits,.text -; CHECK-NEXT: .Lpcsection_base4: -; DEFCM-NEXT: .word .Lpcsection2-.Lpcsection_base4 -; LARGE-NEXT: .xword .Lpcsection2-.Lpcsection_base4 -; CHECK-NEXT: .Lpcsection_base5: -; DEFCM-NEXT: .word .Lpcsection3-.Lpcsection_base5 -; LARGE-NEXT: .xword .Lpcsection3-.Lpcsection_base5 -; CHECK-NEXT: .Lpcsection_base6: -; DEFCM-NEXT: .word .Lpcsection4-.Lpcsection_base6 -; LARGE-NEXT: .xword .Lpcsection4-.Lpcsection_base6 -; CHECK-NEXT: .Lpcsection_base7: -; DEFCM-NEXT: .word .Lpcsection5-.Lpcsection_base7 -; LARGE-NEXT: .xword .Lpcsection5-.Lpcsection_base7 -; CHECK-UNOPT: .word .Lpcsection13-.Lpcsection_base15 +; --- +; CHECK-OPT-NEXT: .Lpcsection_base4: +; CHECK-OPT-NEXT: .word .Lpcsection2-.Lpcsection_base4 +; CHECK-OPT-NEXT: .Lpcsection_base5: +; CHECK-OPT-NEXT: .word .Lpcsection3-.Lpcsection_base5 +; CHECK-OPT-NEXT: .Lpcsection_base6: +; CHECK-OPT-NEXT: .word .Lpcsection4-.Lpcsection_base6 +; CHECK-OPT-NEXT: .Lpcsection_base7: +; CHECK-OPT-NEXT: .word .Lpcsection5-.Lpcsection_base7 +; --- +; CHECK-UNOPT-NEXT: .Lpcsection_base4: +; CHECK-UNOPT-NEXT: .word .Lpcsection2-.Lpcsection_base4 +; CHECK-UNOPT-NEXT: .Lpcsection_base5: +; CHECK-UNOPT-NEXT: .word .Lpcsection3-.Lpcsection_base5 +; CHECK-UNOPT-NEXT: .Lpcsection_base6: +; CHECK-UNOPT-NEXT: .word .Lpcsection4-.Lpcsection_base6 +; CHECK-UNOPT-NEXT: .Lpcsection_base7: +; CHECK-UNOPT-NEXT: .word .Lpcsection5-.Lpcsection_base7 +; CHECK-UNOPT-NEXT: .Lpcsection_base8: +; CHECK-UNOPT-NEXT: .word .Lpcsection6-.Lpcsection_base8 +; CHECK-UNOPT-NEXT: .Lpcsection_base9: +; CHECK-UNOPT-NEXT: .word .Lpcsection7-.Lpcsection_base9 +; CHECK-UNOPT-NEXT: .Lpcsection_base10: +; CHECK-UNOPT-NEXT: .word .Lpcsection8-.Lpcsection_base10 +; CHECK-UNOPT-NEXT: .Lpcsection_base11: +; CHECK-UNOPT-NEXT: .word .Lpcsection9-.Lpcsection_base11 +; CHECK-UNOPT-NEXT: .Lpcsection_base12: +; CHECK-UNOPT-NEXT: .word .Lpcsection10-.Lpcsection_base12 +; CHECK-UNOPT-NEXT: .Lpcsection_base13: +; CHECK-UNOPT-NEXT: .word .Lpcsection11-.Lpcsection_base13 +; CHECK-UNOPT-NEXT: .Lpcsection_base14: +; CHECK-UNOPT-NEXT: .word .Lpcsection12-.Lpcsection_base14 +; CHECK-UNOPT-NEXT: .Lpcsection_base15: +; CHECK-UNOPT-NEXT: .word .Lpcsection13-.Lpcsection_base15 +; --- +; LARGE-NEXT: .Lpcsection_base16: +; LARGE-NEXT: .xword .Lpcsection10-.Lpcsection_base16 +; LARGE-NEXT: .Lpcsection_base17: +; LARGE-NEXT: .xword .Lpcsection11-.Lpcsection_base17 +; LARGE-NEXT: .Lpcsection_base18: +; LARGE-NEXT: .xword .Lpcsection12-.Lpcsection_base18 +; LARGE-NEXT: .Lpcsection_base19: +; LARGE-NEXT: .xword .Lpcsection13-.Lpcsection_base19 +; LARGE-NEXT: .Lpcsection_base20: +; LARGE-NEXT: .xword .Lpcsection14-.Lpcsection_base20 +; LARGE-NEXT: .Lpcsection_base21: +; LARGE-NEXT: .xword .Lpcsection15-.Lpcsection_base21 +; LARGE-NEXT: .Lpcsection_base22: +; LARGE-NEXT: .xword .Lpcsection16-.Lpcsection_base22 +; LARGE-NEXT: .Lpcsection_base23: +; LARGE-NEXT: .xword .Lpcsection17-.Lpcsection_base23 ; CHECK-NEXT: .text entry: %0 = atomicrmw add ptr @foo, i64 1 monotonic, align 8, !pcsections !0 diff --git a/llvm/test/CodeGen/X86/pcsections-memtransfer.ll b/llvm/test/CodeGen/X86/pcsections-memtransfer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/pcsections-memtransfer.ll @@ -0,0 +1,348 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -global-isel=0 -mtriple=x86_64-unknown-linux-gnu -stop-after=irtranslator -verify-machineinstrs | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -global-isel=0 -mtriple=i686-unknown-linux-gnu -stop-after=irtranslator -verify-machineinstrs | FileCheck %s -check-prefix=X32 + +define i64 @call_memcpy_intrinsic(ptr %src, ptr %dst, i64 %len) { + ; X64-LABEL: name: call_memcpy_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: liveins: $rdi, $rdx, $rsi, $rbx + ; X64-NEXT: {{ $}} + ; X64-NEXT: frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X64-NEXT: CFI_INSTRUCTION offset $rbx, -16 + ; X64-NEXT: $rbx = MOV64rr $rsi + ; X64-NEXT: CALL64pcrel32 target-flags(x86-plt) &memcpy, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax, pcsections !0 + ; X64-NEXT: renamable $rax = MOV64rm killed renamable $rbx, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 8 + ; X64-NEXT: RET64 $rax + ; X32-LABEL: name: call_memcpy_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: liveins: $esi + ; X32-NEXT: {{ $}} + ; X32-NEXT: frame-setup PUSH32r killed $esi, implicit-def $esp, implicit $esp + ; X32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; X32-NEXT: $esp = frame-setup SUB32ri8 $esp, 8, implicit-def dead $eflags + ; X32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X32-NEXT: CFI_INSTRUCTION offset $esi, -8 + ; X32-NEXT: renamable $esi = MOV32rm $esp, 1, $noreg, 20, $noreg, pcsections !0 :: (load (s32) from %fixed-stack.3) + ; X32-NEXT: $esp = SUB32ri8 $esp, 4, implicit-def dead $eflags + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32rmm $esp, 1, $noreg, 28, $noreg, implicit-def $esp, implicit $esp :: (load (s32) from %fixed-stack.2, align 8), (store (s32) into stack + 8) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32r renamable $esi, implicit-def $esp, implicit $esp :: (store (s32) into stack + 4) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32rmm $esp, 1, $noreg, 28, $noreg, implicit-def $esp, implicit $esp :: (load (s32) from %fixed-stack.4, align 16), (store (s32) into stack) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: CALLpcrel32 &memcpy, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def dead $eax, pcsections !0 + ; X32-NEXT: $esp = ADD32ri8 $esp, 16, implicit-def dead $eflags + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset -16 + ; X32-NEXT: renamable $eax = MOV32rm renamable $esi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: renamable $edx = MOV32rm killed renamable $esi, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $esp = frame-destroy ADD32ri8 $esp, 8, implicit-def dead $eflags + ; X32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 8 + ; X32-NEXT: $esi = frame-destroy POP32r implicit-def $esp, implicit $esp + ; X32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 4 + ; X32-NEXT: RET32 $eax, $edx + call void @llvm.memcpy.p0.p0.i64(ptr %src, ptr %dst, i64 %len, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memcpy_intrinsic_sm(ptr %src, ptr %dst) { + ; X64-LABEL: name: call_memcpy_intrinsic_sm + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: liveins: $rdi, $rsi + ; X64-NEXT: {{ $}} + ; X64-NEXT: $eax = MOVZX32rm8 renamable $rsi, 1, $noreg, 0, $noreg :: (volatile load (s8) from %ir.dst) + ; X64-NEXT: MOV8mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $al, pcsections !0 :: (volatile store (s8) into %ir.src) + ; X64-NEXT: renamable $rax = MOV64rm killed renamable $rsi, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: RET64 $rax + ; X32-LABEL: name: call_memcpy_intrinsic_sm + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: renamable $eax = MOV32rm $esp, 1, $noreg, 4, $noreg, pcsections !0 :: (load (s32) from %fixed-stack.1, align 16) + ; X32-NEXT: renamable $ecx = MOV32rm $esp, 1, $noreg, 8, $noreg, pcsections !0 :: (load (s32) from %fixed-stack.0) + ; X32-NEXT: $edx = MOVZX32rm8 renamable $ecx, 1, $noreg, 0, $noreg :: (volatile load (s8) from %ir.dst) + ; X32-NEXT: MOV8mr killed renamable $eax, 1, $noreg, 0, $noreg, killed renamable $dl, pcsections !0 :: (volatile store (s8) into %ir.src) + ; X32-NEXT: renamable $eax = MOV32rm renamable $ecx, 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: renamable $edx = MOV32rm killed renamable $ecx, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: RET32 $eax, $edx + call void @llvm.memcpy.p0.p0.i64(ptr %src, ptr %dst, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memcpy_inline_intrinsic(ptr %src, ptr %dst) { + ; X64-LABEL: name: call_memcpy_inline_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: liveins: $rdi, $rsi + ; X64-NEXT: {{ $}} + ; X64-NEXT: $eax = MOVZX32rm8 renamable $rsi, 1, $noreg, 0, $noreg :: (volatile load (s8) from %ir.dst) + ; X64-NEXT: MOV8mr killed renamable $rdi, 1, $noreg, 0, $noreg, killed renamable $al, pcsections !0 :: (volatile store (s8) into %ir.src) + ; X64-NEXT: renamable $rax = MOV64rm killed renamable $rsi, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: RET64 $rax + ; X32-LABEL: name: call_memcpy_inline_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: renamable $eax = MOV32rm $esp, 1, $noreg, 4, $noreg, pcsections !0 :: (load (s32) from %fixed-stack.1, align 16) + ; X32-NEXT: renamable $ecx = MOV32rm $esp, 1, $noreg, 8, $noreg, pcsections !0 :: (load (s32) from %fixed-stack.0) + ; X32-NEXT: $edx = MOVZX32rm8 renamable $ecx, 1, $noreg, 0, $noreg :: (volatile load (s8) from %ir.dst) + ; X32-NEXT: MOV8mr killed renamable $eax, 1, $noreg, 0, $noreg, killed renamable $dl, pcsections !0 :: (volatile store (s8) into %ir.src) + ; X32-NEXT: renamable $eax = MOV32rm renamable $ecx, 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: renamable $edx = MOV32rm killed renamable $ecx, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: RET32 $eax, $edx + call void @llvm.memcpy.inline.p0.p0.i64(ptr %src, ptr %dst, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memmove_intrinsic(ptr %src, ptr %dst, i64 %len) { + ; X64-LABEL: name: call_memmove_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: liveins: $rdi, $rdx, $rsi, $rbx + ; X64-NEXT: {{ $}} + ; X64-NEXT: frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X64-NEXT: CFI_INSTRUCTION offset $rbx, -16 + ; X64-NEXT: $rbx = MOV64rr $rsi + ; X64-NEXT: CALL64pcrel32 target-flags(x86-plt) &memmove, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax, pcsections !0 + ; X64-NEXT: renamable $rax = MOV64rm killed renamable $rbx, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 8 + ; X64-NEXT: RET64 $rax + ; X32-LABEL: name: call_memmove_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: liveins: $esi + ; X32-NEXT: {{ $}} + ; X32-NEXT: frame-setup PUSH32r killed $esi, implicit-def $esp, implicit $esp + ; X32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; X32-NEXT: $esp = frame-setup SUB32ri8 $esp, 8, implicit-def dead $eflags + ; X32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X32-NEXT: CFI_INSTRUCTION offset $esi, -8 + ; X32-NEXT: renamable $esi = MOV32rm $esp, 1, $noreg, 20, $noreg, pcsections !0 :: (load (s32) from %fixed-stack.3) + ; X32-NEXT: $esp = SUB32ri8 $esp, 4, implicit-def dead $eflags + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32rmm $esp, 1, $noreg, 28, $noreg, implicit-def $esp, implicit $esp :: (load (s32) from %fixed-stack.2, align 8), (store (s32) into stack + 8) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32r renamable $esi, implicit-def $esp, implicit $esp :: (store (s32) into stack + 4) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32rmm $esp, 1, $noreg, 28, $noreg, implicit-def $esp, implicit $esp :: (load (s32) from %fixed-stack.4, align 16), (store (s32) into stack) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: CALLpcrel32 &memmove, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def dead $eax, pcsections !0 + ; X32-NEXT: $esp = ADD32ri8 $esp, 16, implicit-def dead $eflags + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset -16 + ; X32-NEXT: renamable $eax = MOV32rm renamable $esi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: renamable $edx = MOV32rm killed renamable $esi, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $esp = frame-destroy ADD32ri8 $esp, 8, implicit-def dead $eflags + ; X32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 8 + ; X32-NEXT: $esi = frame-destroy POP32r implicit-def $esp, implicit $esp + ; X32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 4 + ; X32-NEXT: RET32 $eax, $edx + call void @llvm.memmove.p0.p0.i64(ptr %src, ptr %dst, i64 %len, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_intrinsic(ptr %dst, i64 %len) { + ; X64-LABEL: name: call_memset_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: liveins: $rdi, $rsi, $rbx + ; X64-NEXT: {{ $}} + ; X64-NEXT: frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X64-NEXT: CFI_INSTRUCTION offset $rbx, -16 + ; X64-NEXT: $rdx = MOV64rr $rsi + ; X64-NEXT: $rbx = MOV64rr $rdi + ; X64-NEXT: $esi = XOR32rr undef $esi, undef $esi, implicit-def dead $eflags, pcsections !0 + ; X64-NEXT: CALL64pcrel32 target-flags(x86-plt) &memset, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $rdx, implicit-def $rsp, implicit-def $ssp, implicit-def dead $rax, pcsections !0 + ; X64-NEXT: renamable $rax = MOV64rm killed renamable $rbx, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 8 + ; X64-NEXT: RET64 $rax + ; X32-LABEL: name: call_memset_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: liveins: $esi + ; X32-NEXT: {{ $}} + ; X32-NEXT: frame-setup PUSH32r killed $esi, implicit-def $esp, implicit $esp + ; X32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; X32-NEXT: $esp = frame-setup SUB32ri8 $esp, 8, implicit-def dead $eflags + ; X32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X32-NEXT: CFI_INSTRUCTION offset $esi, -8 + ; X32-NEXT: renamable $esi = MOV32rm $esp, 1, $noreg, 16, $noreg, pcsections !0 :: (load (s32) from %fixed-stack.3, align 16) + ; X32-NEXT: $esp = SUB32ri8 $esp, 4, implicit-def dead $eflags + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32rmm $esp, 1, $noreg, 24, $noreg, implicit-def $esp, implicit $esp :: (load (s32) from %fixed-stack.2), (store (s32) into stack + 8) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32i8 0, implicit-def $esp, implicit $esp :: (store (s32) into stack + 4) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32r renamable $esi, implicit-def $esp, implicit $esp :: (store (s32) into stack) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: CALLpcrel32 &memset, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, implicit-def dead $eax, pcsections !0 + ; X32-NEXT: $esp = ADD32ri8 $esp, 16, implicit-def dead $eflags + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset -16 + ; X32-NEXT: renamable $eax = MOV32rm renamable $esi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: renamable $edx = MOV32rm killed renamable $esi, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $esp = frame-destroy ADD32ri8 $esp, 8, implicit-def dead $eflags + ; X32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 8 + ; X32-NEXT: $esi = frame-destroy POP32r implicit-def $esp, implicit $esp + ; X32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 4 + ; X32-NEXT: RET32 $eax, $edx + call void @llvm.memset.p0.p0.i64(ptr %dst, i8 0, i64 %len, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_inline_intrinsic(ptr %dst) { + ; X64-LABEL: name: call_memset_inline_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: liveins: $rdi + ; X64-NEXT: {{ $}} + ; X64-NEXT: MOV8mi renamable $rdi, 1, $noreg, 0, $noreg, 0, pcsections !0 :: (volatile store (s8) into %ir.dst) + ; X64-NEXT: renamable $rax = MOV64rm killed renamable $rdi, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: RET64 $rax + ; X32-LABEL: name: call_memset_inline_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: renamable $eax = MOV32rm $esp, 1, $noreg, 4, $noreg, pcsections !0 :: (load (s32) from %fixed-stack.0, align 16) + ; X32-NEXT: MOV8mi renamable $eax, 1, $noreg, 0, $noreg, 0, pcsections !0 :: (volatile store (s8) into %ir.dst) + ; X32-NEXT: renamable $edx = MOV32rm renamable $eax, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: renamable $eax = MOV32rm killed renamable $eax, 1, $noreg, 0, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: RET32 $eax, $edx + call void @llvm.memset.inline.p0.p0.i64(ptr %dst, i8 0, i64 1, i1 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memcpy_element_unordered_atomic_intrinsic() { + ; X64-LABEL: name: call_memcpy_element_unordered_atomic_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X64-NEXT: renamable $rdi = LEA64r $rsp, 1, $noreg, 4, $noreg, pcsections !0 + ; X64-NEXT: $rsi = MOV64rr $rsp + ; X64-NEXT: $edx = MOV32ri 1, implicit-def $rdx, pcsections !0 + ; X64-NEXT: CALL64pcrel32 target-flags(x86-plt) &__llvm_memcpy_element_unordered_atomic_1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit killed $rdx, implicit-def $rsp, implicit-def $ssp, pcsections !0 + ; X64-NEXT: renamable $rax = MOV64rm $rsp, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 8 + ; X64-NEXT: RET64 $rax + ; X32-LABEL: name: call_memcpy_element_unordered_atomic_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: $esp = frame-setup SUB32ri8 $esp, 12, implicit-def dead $eflags + ; X32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X32-NEXT: renamable $eax = LEA32r $esp, 1, $noreg, 4, $noreg, pcsections !0 + ; X32-NEXT: renamable $ecx = LEA32r $esp, 1, $noreg, 8, $noreg, pcsections !0 + ; X32-NEXT: PUSH32i8 0, implicit-def $esp, implicit $esp :: (store (s32) into stack + 12) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32i8 1, implicit-def $esp, implicit $esp :: (store (s32) into stack + 8) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32r killed renamable $eax, implicit-def $esp, implicit $esp :: (store (s32) into stack + 4) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32r killed renamable $ecx, implicit-def $esp, implicit $esp :: (store (s32) into stack) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: CALLpcrel32 &__llvm_memcpy_element_unordered_atomic_1, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, pcsections !0 + ; X32-NEXT: $esp = ADD32ri8 $esp, 16, implicit-def dead $eflags + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset -16 + ; X32-NEXT: renamable $eax = MOV32rm $esp, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: renamable $edx = MOV32rm $esp, 1, $noreg, 8, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $esp = frame-destroy ADD32ri8 $esp, 12, implicit-def dead $eflags + ; X32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 4 + ; X32-NEXT: RET32 $eax, $edx + %src = alloca i32, align 1 + %dst = alloca i32, align 1 + call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 %src, ptr align 1 %dst, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memmove_element_unordered_atomic_intrinsic() { + ; X64-LABEL: name: call_memmove_element_unordered_atomic_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X64-NEXT: renamable $rdi = LEA64r $rsp, 1, $noreg, 4, $noreg, pcsections !0 + ; X64-NEXT: $rsi = MOV64rr $rsp + ; X64-NEXT: $edx = MOV32ri 1, implicit-def $rdx, pcsections !0 + ; X64-NEXT: CALL64pcrel32 target-flags(x86-plt) &__llvm_memmove_element_unordered_atomic_1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit killed $rdx, implicit-def $rsp, implicit-def $ssp, pcsections !0 + ; X64-NEXT: renamable $rax = MOV64rm $rsp, 1, $noreg, 0, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 8 + ; X64-NEXT: RET64 $rax + ; X32-LABEL: name: call_memmove_element_unordered_atomic_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: $esp = frame-setup SUB32ri8 $esp, 12, implicit-def dead $eflags + ; X32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X32-NEXT: renamable $eax = LEA32r $esp, 1, $noreg, 4, $noreg, pcsections !0 + ; X32-NEXT: renamable $ecx = LEA32r $esp, 1, $noreg, 8, $noreg, pcsections !0 + ; X32-NEXT: PUSH32i8 0, implicit-def $esp, implicit $esp :: (store (s32) into stack + 12) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32i8 1, implicit-def $esp, implicit $esp :: (store (s32) into stack + 8) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32r killed renamable $eax, implicit-def $esp, implicit $esp :: (store (s32) into stack + 4) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32r killed renamable $ecx, implicit-def $esp, implicit $esp :: (store (s32) into stack) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: CALLpcrel32 &__llvm_memmove_element_unordered_atomic_1, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, pcsections !0 + ; X32-NEXT: $esp = ADD32ri8 $esp, 16, implicit-def dead $eflags + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset -16 + ; X32-NEXT: renamable $eax = MOV32rm $esp, 1, $noreg, 4, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: renamable $edx = MOV32rm $esp, 1, $noreg, 8, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $esp = frame-destroy ADD32ri8 $esp, 12, implicit-def dead $eflags + ; X32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 4 + ; X32-NEXT: RET32 $eax, $edx + %src = alloca i32, align 1 + %dst = alloca i32, align 1 + call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 %src, ptr align 1 %dst, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + +define i64 @call_memset_element_unordered_atomic_intrinsic() { + ; X64-LABEL: name: call_memset_element_unordered_atomic_intrinsic + ; X64: bb.0 (%ir-block.0): + ; X64-NEXT: frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X64-NEXT: renamable $rdi = LEA64r $rsp, 1, $noreg, 4, $noreg, pcsections !0 + ; X64-NEXT: $edx = MOV32ri 1, implicit-def $rdx, pcsections !0 + ; X64-NEXT: $esi = XOR32rr undef $esi, undef $esi, implicit-def dead $eflags, pcsections !0 + ; X64-NEXT: CALL64pcrel32 target-flags(x86-plt) &__llvm_memset_element_unordered_atomic_1, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit killed $rdx, implicit-def $rsp, implicit-def $ssp, pcsections !0 + ; X64-NEXT: renamable $rax = MOV64rm $rsp, 1, $noreg, 4, $noreg :: (load (s64) from %ir.dst) + ; X64-NEXT: $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp + ; X64-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 8 + ; X64-NEXT: RET64 $rax + ; X32-LABEL: name: call_memset_element_unordered_atomic_intrinsic + ; X32: bb.0 (%ir-block.0): + ; X32-NEXT: $esp = frame-setup SUB32ri8 $esp, 12, implicit-def dead $eflags + ; X32-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; X32-NEXT: renamable $eax = LEA32r $esp, 1, $noreg, 8, $noreg, pcsections !0 + ; X32-NEXT: PUSH32i8 0, implicit-def $esp, implicit $esp :: (store (s32) into stack + 12) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32i8 1, implicit-def $esp, implicit $esp :: (store (s32) into stack + 8) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32i8 0, implicit-def $esp, implicit $esp :: (store (s32) into stack + 4) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: PUSH32r killed renamable $eax, implicit-def $esp, implicit $esp :: (store (s32) into stack) + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4 + ; X32-NEXT: CALLpcrel32 &__llvm_memset_element_unordered_atomic_1, csr_32, implicit $esp, implicit $ssp, implicit-def $esp, implicit-def $ssp, pcsections !0 + ; X32-NEXT: $esp = ADD32ri8 $esp, 16, implicit-def dead $eflags + ; X32-NEXT: CFI_INSTRUCTION adjust_cfa_offset -16 + ; X32-NEXT: renamable $eax = MOV32rm $esp, 1, $noreg, 8, $noreg :: (load (s32) from %ir.dst) + ; X32-NEXT: renamable $edx = MOV32rm $esp, 1, $noreg, 12, $noreg :: (load (s32) from %ir.dst + 4) + ; X32-NEXT: $esp = frame-destroy ADD32ri8 $esp, 12, implicit-def dead $eflags + ; X32-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 4 + ; X32-NEXT: RET32 $eax, $edx + %dst = alloca i32, align 1 + call void @llvm.memset.element.unordered.atomic.p0.p0.i64(ptr align 1 %dst, i8 0, i64 1, i32 1), !pcsections !0 + %val = load i64, ptr %dst + ret i64 %val +} + + +!0 = !{!"foo"} + +declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) +declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) +declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) +declare void @llvm.memset.p0.p0.i64(ptr nocapture, i8, i64, i1) +declare void @llvm.memset.inline.p0.p0.i64(ptr nocapture, i8, i64, i1) +declare void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) +declare void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) +declare void @llvm.memset.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, i8, i64, i32) diff --git a/llvm/test/CodeGen/X86/pcsections.ll b/llvm/test/CodeGen/X86/pcsections.ll --- a/llvm/test/CodeGen/X86/pcsections.ll +++ b/llvm/test/CodeGen/X86/pcsections.ll @@ -48,25 +48,47 @@ ; CHECK-LABEL: multiple: ; CHECK-NEXT: .Lfunc_begin2 ; CHECK: # %bb.0: # %entry -; CHECK: .Lpcsection0: -; CHECK-NEXT: movq +; -- +; DEFCM: .Lpcsection0: +; DEFCM-NEXT: movq +; -- +; LARGE: .Lpcsection0: +; LARGE-NEXT: movabsq +; LARGE-NEXT: .Lpcsection1: +; LARGE-NEXT: movq +; -- ; CHECK-NEXT: retq ; CHECK-NEXT: .Lfunc_end2: -; CHECK: .section section_no_aux,"awo",@progbits,.text -; CHECK-NEXT: .Lpcsection_base2: -; DEFCM-NEXT: .long .Lfunc_begin2-.Lpcsection_base2 -; LARGE-NEXT: .quad .Lfunc_begin2-.Lpcsection_base2 -; CHECK-NEXT: .long .Lfunc_end2-.Lfunc_begin2 -; CHECK-NEXT: .section section_aux_42,"awo",@progbits,.text -; CHECK-NEXT: .Lpcsection_base3: -; DEFCM-NEXT: .long .Lpcsection0-.Lpcsection_base3 -; LARGE-NEXT: .quad .Lpcsection0-.Lpcsection_base3 -; CHECK-NEXT: .long 42 -; CHECK-NEXT: .section section_aux_21264,"awo",@progbits,.text -; CHECK-NEXT: .Lpcsection_base4: -; DEFCM-NEXT: .long .Lpcsection0-.Lpcsection_base4 -; LARGE-NEXT: .quad .Lpcsection0-.Lpcsection_base4 -; CHECK-NEXT: .long 21264 +; CHECK: .section section_no_aux,"awo",@progbits,.text +; -- +; DEFCM-NEXT: .Lpcsection_base2: +; DEFCM-NEXT: .long .Lfunc_begin2-.Lpcsection_base2 +; DEFCM-NEXT: .long .Lfunc_end2-.Lfunc_begin2 +; DEFCM-NEXT: .section section_aux_42,"awo",@progbits,.text +; DEFCM-NEXT: .Lpcsection_base3: +; DEFCM-NEXT: .long .Lpcsection0-.Lpcsection_base3 +; DEFCM-NEXT: .long 42 +; DEFCM-NEXT: .section section_aux_21264,"awo",@progbits,.text +; DEFCM-NEXT: .Lpcsection_base4: +; DEFCM-NEXT: .long .Lpcsection0-.Lpcsection_base4 +; DEFCM-NEXT: .long 21264 +; -- +; LARGE: .Lpcsection_base2: +; LARGE-NEXT: .quad .Lfunc_begin2-.Lpcsection_base2 +; LARGE-NEXT: .long .Lfunc_end2-.Lfunc_begin2 +; LARGE-NEXT: .section section_aux_42,"awo",@progbits,.text +; LARGE-NEXT: .Lpcsection_base3: +; LARGE-NEXT: .quad .Lpcsection0-.Lpcsection_base3 +; LARGE-NEXT: .Lpcsection_base4: +; LARGE-NEXT: .quad .Lpcsection1-.Lpcsection_base4 +; LARGE-NEXT: .long 42 +; LARGE-NEXT: .section section_aux_21264,"awo",@progbits,.text +; LARGE-NEXT: .Lpcsection_base5: +; LARGE-NEXT: .quad .Lpcsection0-.Lpcsection_base5 +; LARGE-NEXT: .Lpcsection_base6: +; LARGE-NEXT: .quad .Lpcsection1-.Lpcsection_base6 +; LARGE-NEXT: .long 21264 +; -- ; CHECK-NEXT: .text entry: %0 = load i64, ptr @bar, align 8, !pcsections !2 @@ -75,16 +97,29 @@ define i64 @test_simple_atomic() { ; CHECK-LABEL: test_simple_atomic: -; CHECK: .Lpcsection1: -; CHECK-NEXT: movq +; -- +; DEFCM: .Lpcsection1: +; DEFCM-NEXT: movq +; -- +; LARGE: .Lpcsection2: +; LARGE-NEXT: movabsq +; LARGE-NEXT: .Lpcsection3: +; LARGE-NEXT: movq +; -- ; CHECK-NOT: .Lpcsection ; CHECK: addq ; CHECK-NEXT: retq ; CHECK-NEXT: .Lfunc_end3: -; CHECK: .section section_no_aux,"awo",@progbits,.text -; CHECK-NEXT: .Lpcsection_base5: -; DEFCM-NEXT: .long .Lpcsection1-.Lpcsection_base5 -; LARGE-NEXT: .quad .Lpcsection1-.Lpcsection_base5 +; CHECK: .section section_no_aux,"awo",@progbits,.text +; -- +; DEFCM-NEXT: .Lpcsection_base5: +; DEFCM-NEXT: .long .Lpcsection1-.Lpcsection_base5 +; -- +; LARGE-NEXT: .Lpcsection_base7: +; LARGE-NEXT: .quad .Lpcsection2-.Lpcsection_base7 +; LARGE-NEXT: .Lpcsection_base8: +; LARGE-NEXT: .quad .Lpcsection3-.Lpcsection_base8 +; -- ; CHECK-NEXT: .text entry: %0 = load atomic i64, ptr @foo monotonic, align 8, !pcsections !0 @@ -95,18 +130,38 @@ define i64 @test_complex_atomic() { ; CHECK-LABEL: test_complex_atomic: -; CHECK: movl $1 -; CHECK-NEXT: .Lpcsection2: -; CHECK-NEXT: lock xaddq +; -- +; DEFCM: .Lpcsection2: +; DEFCM-NEXT: movl +; DEFCM-NEXT: .Lpcsection3: +; DEFCM-NEXT: lock +; -- +; LARGE: .Lpcsection4: +; LARGE-NEXT: movabsq +; LARGE-NEXT: .Lpcsection5: +; LARGE-NEXT: movl +; LARGE-NEXT: .Lpcsection6: +; LARGE-NEXT: lock +; -- ; CHECK-NOT: .Lpcsection ; CHECK: movq ; CHECK: addq ; CHECK: retq ; CHECK-NEXT: .Lfunc_end4: -; CHECK: .section section_no_aux,"awo",@progbits,.text -; CHECK-NEXT: .Lpcsection_base6: -; DEFCM-NEXT: .long .Lpcsection2-.Lpcsection_base6 -; LARGE-NEXT: .quad .Lpcsection2-.Lpcsection_base6 +; CHECK: .section section_no_aux,"awo",@progbits,.text +; -- +; DEFCM-NEXT: .Lpcsection_base6: +; DEFCM-NEXT: .long .Lpcsection2-.Lpcsection_base6 +; DEFCM-NEXT: .Lpcsection_base7: +; DEFCM-NEXT: .long .Lpcsection3-.Lpcsection_base7 +; -- +; LARGE-NEXT: .Lpcsection_base9: +; LARGE-NEXT: .quad .Lpcsection4-.Lpcsection_base9 +; LARGE-NEXT: .Lpcsection_base10: +; LARGE-NEXT: .quad .Lpcsection5-.Lpcsection_base10 +; LARGE-NEXT: .Lpcsection_base11: +; LARGE-NEXT: .quad .Lpcsection6-.Lpcsection_base11 +; -- ; CHECK-NEXT: .text entry: %0 = atomicrmw add ptr @foo, i64 1 monotonic, align 8, !pcsections !0