Index: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h @@ -2197,8 +2197,6 @@ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { LSBaseSDNodeBits.AddressingMode = AM; assert(getAddressingMode() == AM && "Value truncated"); - assert((!MMO->isAtomic() || MMO->isVolatile()) && - "use an AtomicSDNode instead for non-volatile atomics"); } const SDValue &getOffset() const { Index: llvm/trunk/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetLowering.h +++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h @@ -3716,6 +3716,25 @@ return MachineMemOperand::MONone; } + /// Should SelectionDAG lower an atomic store of the given kind as a normal + /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to + /// eventually migrate all targets to the using StoreSDNodes, but porting is + /// being done target at a time. + virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { + assert(SI.isAtomic() && "violated precondition"); + return false; + } + + /// Should SelectionDAG lower an atomic load of the given kind as a normal + /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to + /// eventually migrate all targets to the using LoadSDNodes, but porting is + /// being done target at a time. + virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { + assert(LI.isAtomic() && "violated precondition"); + return false; + } + + /// This callback is invoked by the type legalizer to legalize nodes with an /// illegal operand type but legal result types. It replaces the /// LowerOperation callback in the type Legalizer. The reason we can not do Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4658,9 +4658,26 @@ AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, - getValue(I.getPointerOperand()), MMO); + + SDValue Ptr = getValue(I.getPointerOperand()); + + if (TLI.lowerAtomicLoadAsLoadSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for loads to prevent future divergence. + SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO); + if (MemVT != VT) + L = DAG.getPtrExtOrTrunc(L, dl, VT); + + setValue(&I, L); + if (!I.isUnordered()) { + SDValue OutChain = L.getValue(1); + DAG.setRoot(OutChain); + } + return; + } + + SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, + Ptr, MMO); SDValue OutChain = L.getValue(1); if (MemVT != VT) @@ -4699,9 +4716,17 @@ SDValue Val = getValue(I.getValueOperand()); if (Val.getValueType() != MemVT) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); + SDValue Ptr = getValue(I.getPointerOperand()); + if (TLI.lowerAtomicStoreAsStoreSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for stores to prevent future divergence. + SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO); + DAG.setRoot(S); + return; + } SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, - getValue(I.getPointerOperand()), Val, MMO); + Ptr, Val, MMO); DAG.setRoot(OutChain); Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -1388,6 +1388,9 @@ LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; + bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; + bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; + bool needsCmpXchgNb(Type *MemType) const; void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -92,6 +92,13 @@ "SHIFT, LEA, etc."), cl::Hidden); +static cl::opt ExperimentalUnorderedISEL( + "x86-experimental-unordered-atomic-isel", cl::init(false), + cl::desc("Use LoadSDNode and StoreSDNode instead of " + "AtomicSDNode for unordered atomic loads and " + "stores respectively."), + cl::Hidden); + /// Call this when the user attempts to do something unsupported, like /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike /// report_fatal_error, so calling code should attempt to recover without @@ -26493,6 +26500,18 @@ return Loaded; } +bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { + if (!SI.isUnordered()) + return false; + return ExperimentalUnorderedISEL; +} +bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { + if (!LI.isUnordered()) + return false; + return ExperimentalUnorderedISEL; +} + + /// Emit a locked operation on a stack location which does not change any /// memory location, but does involve a lock prefix. Location is chosen to be /// a) very likely accessed only by a single thread to minimize cache traffic, Index: llvm/trunk/test/CodeGen/X86/atomic-unordered.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atomic-unordered.ll +++ llvm/trunk/test/CodeGen/X86/atomic-unordered.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O0 %s -; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O3 %s +; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-NOX,CHECK-O0 %s +; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-NOX,CHECK-O3 %s +; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel | FileCheck --check-prefixes=CHECK,CHECK-EX %s define i8 @load_i8(i8* %ptr) { ; CHECK-LABEL: load_i8: @@ -22,6 +23,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movb %sil, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_i8: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movb %sil, (%rdi) +; CHECK-EX-NEXT: retq store atomic i8 %v, i8* %ptr unordered, align 1 ret void } @@ -36,6 +42,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movzwl (%rdi), %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_i16: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movzwl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i16, i16* %ptr unordered, align 2 ret i16 %v } @@ -52,6 +63,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movw %si, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_i16: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movw %si, (%rdi) +; CHECK-EX-NEXT: retq store atomic i16 %v, i16* %ptr unordered, align 2 ret void } @@ -111,6 +127,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: orq $7, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: narrow_writeback_or: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: orq $7, (%rdi) +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 %v.new = or i64 %v, 7 store atomic i64 %v.new, i64* %ptr unordered, align 8 @@ -133,6 +154,12 @@ ; CHECK-O3-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00 ; CHECK-O3-NEXT: andq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: narrow_writeback_and: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00 +; CHECK-EX-NEXT: andq %rax, (%rdi) +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 %v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00 store atomic i64 %v.new, i64* %ptr unordered, align 8 @@ -152,6 +179,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: xorq $7, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: narrow_writeback_xor: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: xorq $7, (%rdi) +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 %v.new = xor i64 %v, 7 store atomic i64 %v.new, i64* %ptr unordered, align 8 @@ -253,6 +285,20 @@ ; CHECK-O3-NEXT: popq %rbx ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_i128: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: pushq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 16 +; CHECK-EX-NEXT: .cfi_offset %rbx, -16 +; CHECK-EX-NEXT: xorl %eax, %eax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: xorl %ecx, %ecx +; CHECK-EX-NEXT: xorl %ebx, %ebx +; CHECK-EX-NEXT: lock cmpxchg16b (%rdi) +; CHECK-EX-NEXT: popq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 8 +; CHECK-EX-NEXT: retq %v = load atomic i128, i128* %ptr unordered, align 16 ret i128 %v } @@ -306,6 +352,25 @@ ; CHECK-O3-NEXT: popq %rbx ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_i128: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: pushq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 16 +; CHECK-EX-NEXT: .cfi_offset %rbx, -16 +; CHECK-EX-NEXT: movq %rdx, %rcx +; CHECK-EX-NEXT: movq %rsi, %rbx +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq 8(%rdi), %rdx +; CHECK-EX-NEXT: .p2align 4, 0x90 +; CHECK-EX-NEXT: .LBB16_1: # %atomicrmw.start +; CHECK-EX-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-EX-NEXT: lock cmpxchg16b (%rdi) +; CHECK-EX-NEXT: jne .LBB16_1 +; CHECK-EX-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-EX-NEXT: popq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 8 +; CHECK-EX-NEXT: retq store atomic i128 %v, i128* %ptr unordered, align 16 ret void } @@ -359,6 +424,28 @@ ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: vzeroupper ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_i256: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: pushq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 16 +; CHECK-EX-NEXT: subq $32, %rsp +; CHECK-EX-NEXT: .cfi_def_cfa_offset 48 +; CHECK-EX-NEXT: .cfi_offset %rbx, -16 +; CHECK-EX-NEXT: movq %rdi, %rbx +; CHECK-EX-NEXT: movq %rsp, %rdx +; CHECK-EX-NEXT: movl $32, %edi +; CHECK-EX-NEXT: xorl %ecx, %ecx +; CHECK-EX-NEXT: callq __atomic_load +; CHECK-EX-NEXT: vmovups (%rsp), %ymm0 +; CHECK-EX-NEXT: vmovups %ymm0, (%rbx) +; CHECK-EX-NEXT: movq %rbx, %rax +; CHECK-EX-NEXT: addq $32, %rsp +; CHECK-EX-NEXT: .cfi_def_cfa_offset 16 +; CHECK-EX-NEXT: popq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 8 +; CHECK-EX-NEXT: vzeroupper +; CHECK-EX-NEXT: retq %v = load atomic i256, i256* %ptr unordered, align 16 ret i256 %v } @@ -402,19 +489,43 @@ ; CHECK-O3-NEXT: addq $40, %rsp ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_i256: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: subq $40, %rsp +; CHECK-EX-NEXT: .cfi_def_cfa_offset 48 +; CHECK-EX-NEXT: movq %rdi, %rax +; CHECK-EX-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; CHECK-EX-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-EX-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-EX-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-EX-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CHECK-EX-NEXT: movl $32, %edi +; CHECK-EX-NEXT: movq %rax, %rsi +; CHECK-EX-NEXT: xorl %ecx, %ecx +; CHECK-EX-NEXT: callq __atomic_store +; CHECK-EX-NEXT: addq $40, %rsp +; CHECK-EX-NEXT: .cfi_def_cfa_offset 8 +; CHECK-EX-NEXT: retq store atomic i256 %v, i256* %ptr unordered, align 16 ret void } ; Legal if wider type is also atomic (TODO) define void @vec_store(i32* %p0, <2 x i32> %vec) { -; CHECK-LABEL: vec_store: -; CHECK: # %bb.0: -; CHECK-NEXT: vmovd %xmm0, %eax -; CHECK-NEXT: vpextrd $1, %xmm0, %ecx -; CHECK-NEXT: movl %eax, (%rdi) -; CHECK-NEXT: movl %ecx, 4(%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: vec_store: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: vmovd %xmm0, %eax +; CHECK-NOX-NEXT: vpextrd $1, %xmm0, %ecx +; CHECK-NOX-NEXT: movl %eax, (%rdi) +; CHECK-NOX-NEXT: movl %ecx, 4(%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: vec_store: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: vmovss %xmm0, (%rdi) +; CHECK-EX-NEXT: vextractps $1, %xmm0, 4(%rdi) +; CHECK-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %v2 = extractelement <2 x i32> %vec, i32 1 %p1 = getelementptr i32, i32* %p0, i64 1 @@ -425,13 +536,19 @@ ; Not legal to widen due to alignment restriction define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) { -; CHECK-LABEL: vec_store_unaligned: -; CHECK: # %bb.0: -; CHECK-NEXT: vmovd %xmm0, %eax -; CHECK-NEXT: vpextrd $1, %xmm0, %ecx -; CHECK-NEXT: movl %eax, (%rdi) -; CHECK-NEXT: movl %ecx, 4(%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: vec_store_unaligned: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: vmovd %xmm0, %eax +; CHECK-NOX-NEXT: vpextrd $1, %xmm0, %ecx +; CHECK-NOX-NEXT: movl %eax, (%rdi) +; CHECK-NOX-NEXT: movl %ecx, 4(%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: vec_store_unaligned: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: vmovss %xmm0, (%rdi) +; CHECK-EX-NEXT: vextractps $1, %xmm0, 4(%rdi) +; CHECK-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %v2 = extractelement <2 x i32> %vec, i32 1 %p1 = getelementptr i32, i32* %p0, i64 1 @@ -445,12 +562,18 @@ ; Legal if wider type is also atomic (TODO) ; Also, can avoid register move from xmm to eax (TODO) define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) { -; CHECK-LABEL: widen_broadcast2: -; CHECK: # %bb.0: -; CHECK-NEXT: vmovd %xmm0, %eax -; CHECK-NEXT: movl %eax, (%rdi) -; CHECK-NEXT: movl %eax, 4(%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: widen_broadcast2: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: vmovd %xmm0, %eax +; CHECK-NOX-NEXT: movl %eax, (%rdi) +; CHECK-NOX-NEXT: movl %eax, 4(%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: widen_broadcast2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: vmovss %xmm0, (%rdi) +; CHECK-EX-NEXT: vmovss %xmm0, 4(%rdi) +; CHECK-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 8 @@ -460,12 +583,18 @@ ; Not legal to widen due to alignment restriction define void @widen_broadcast2_unaligned(i32* %p0, <2 x i32> %vec) { -; CHECK-LABEL: widen_broadcast2_unaligned: -; CHECK: # %bb.0: -; CHECK-NEXT: vmovd %xmm0, %eax -; CHECK-NEXT: movl %eax, (%rdi) -; CHECK-NEXT: movl %eax, 4(%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: widen_broadcast2_unaligned: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: vmovd %xmm0, %eax +; CHECK-NOX-NEXT: movl %eax, (%rdi) +; CHECK-NOX-NEXT: movl %eax, 4(%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: widen_broadcast2_unaligned: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: vmovss %xmm0, (%rdi) +; CHECK-EX-NEXT: vmovss %xmm0, 4(%rdi) +; CHECK-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 4 @@ -475,11 +604,16 @@ ; Legal if wider type is also atomic (TODO) define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) { -; CHECK-LABEL: widen_zero_init: -; CHECK: # %bb.0: -; CHECK-NEXT: movl $0, (%rdi) -; CHECK-NEXT: movl $0, 4(%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: widen_zero_init: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movl $0, (%rdi) +; CHECK-NOX-NEXT: movl $0, 4(%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: widen_zero_init: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq $0, (%rdi) +; CHECK-EX-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 0, i32* %p0 unordered, align 8 store atomic i32 0, i32* %p1 unordered, align 4 @@ -488,11 +622,16 @@ ; Not legal to widen due to alignment restriction define void @widen_zero_init_unaligned(i32* %p0, i32 %v1, i32 %v2) { -; CHECK-LABEL: widen_zero_init_unaligned: -; CHECK: # %bb.0: -; CHECK-NEXT: movl $0, (%rdi) -; CHECK-NEXT: movl $0, 4(%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: widen_zero_init_unaligned: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movl $0, (%rdi) +; CHECK-NOX-NEXT: movl $0, 4(%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: widen_zero_init_unaligned: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq $0, (%rdi) +; CHECK-EX-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 0, i32* %p0 unordered, align 4 store atomic i32 0, i32* %p1 unordered, align 4 @@ -526,6 +665,12 @@ ; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: addq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_add2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: addq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = add i64 %v, %v2 ret i64 %ret @@ -543,6 +688,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: addq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_add3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: addq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = add i64 %v, %v2 @@ -562,6 +713,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: addq $-15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_sub1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: addq $-15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sub i64 %v, 15 ret i64 %ret @@ -603,6 +760,13 @@ ; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax ; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_mul1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = mul i64 %v, 15 ret i64 %ret @@ -620,6 +784,12 @@ ; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: imulq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_mul2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: imulq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = mul i64 %v, %v2 ret i64 %ret @@ -637,6 +807,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: imulq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_mul3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: imulq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = mul i64 %v, %v2 @@ -665,6 +841,19 @@ ; CHECK-O3-NEXT: sarq $3, %rdx ; CHECK-O3-NEXT: addq %rdx, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_sdiv1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: imulq %rdx +; CHECK-EX-NEXT: addq %rcx, %rdx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: shrq $63, %rax +; CHECK-EX-NEXT: sarq $3, %rdx +; CHECK-EX-NEXT: addq %rdx, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sdiv i64 %v, 15 ret i64 %ret @@ -696,6 +885,24 @@ ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_sdiv2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB35_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rsi +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB35_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sdiv i64 %v, %v2 ret i64 %ret @@ -727,6 +934,25 @@ ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_sdiv3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq (%rsi), %rcx +; CHECK-EX-NEXT: movq %rax, %rdx +; CHECK-EX-NEXT: orq %rcx, %rdx +; CHECK-EX-NEXT: shrq $32, %rdx +; CHECK-EX-NEXT: je .LBB36_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rcx +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB36_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %ecx +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = sdiv i64 %v, %v2 @@ -752,6 +978,14 @@ ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: shrq $3, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_udiv1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-EX-NEXT: mulq (%rdi) +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: shrq $3, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = udiv i64 %v, 15 ret i64 %ret @@ -783,6 +1017,24 @@ ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_udiv2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB38_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rsi +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB38_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = udiv i64 %v, %v2 ret i64 %ret @@ -815,6 +1067,25 @@ ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_udiv3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq (%rsi), %rcx +; CHECK-EX-NEXT: movq %rax, %rdx +; CHECK-EX-NEXT: orq %rcx, %rdx +; CHECK-EX-NEXT: shrq $32, %rdx +; CHECK-EX-NEXT: je .LBB39_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rcx +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB39_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %ecx +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = udiv i64 %v, %v2 @@ -848,6 +1119,23 @@ ; CHECK-O3-NEXT: subq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_srem1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: imulq %rdx +; CHECK-EX-NEXT: addq %rcx, %rdx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: shrq $63, %rax +; CHECK-EX-NEXT: sarq $3, %rdx +; CHECK-EX-NEXT: addq %rax, %rdx +; CHECK-EX-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: subq %rax, %rcx +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = srem i64 %v, 15 ret i64 %ret @@ -881,6 +1169,25 @@ ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_srem2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB41_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rsi +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB41_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: movl %edx, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = srem i64 %v, %v2 ret i64 %ret @@ -914,6 +1221,26 @@ ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_srem3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq (%rsi), %rcx +; CHECK-EX-NEXT: movq %rax, %rdx +; CHECK-EX-NEXT: orq %rcx, %rdx +; CHECK-EX-NEXT: shrq $32, %rdx +; CHECK-EX-NEXT: je .LBB42_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rcx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB42_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %ecx +; CHECK-EX-NEXT: movl %edx, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = srem i64 %v, %v2 @@ -944,6 +1271,19 @@ ; CHECK-O3-NEXT: subq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_urem1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: mulq %rdx +; CHECK-EX-NEXT: shrq $3, %rdx +; CHECK-EX-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: subq %rax, %rcx +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = urem i64 %v, 15 ret i64 %ret @@ -978,6 +1318,25 @@ ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_urem2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB44_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rsi +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB44_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: movl %edx, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = urem i64 %v, %v2 ret i64 %ret @@ -1012,6 +1371,26 @@ ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_urem3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq (%rsi), %rcx +; CHECK-EX-NEXT: movq %rax, %rdx +; CHECK-EX-NEXT: orq %rcx, %rdx +; CHECK-EX-NEXT: shrq $32, %rdx +; CHECK-EX-NEXT: je .LBB45_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rcx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB45_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %ecx +; CHECK-EX-NEXT: movl %edx, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = urem i64 %v, %v2 @@ -1043,6 +1422,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_shl2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: shlxq %rsi, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = shl i64 %v, %v2 ret i64 %ret @@ -1062,6 +1446,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: shlxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_shl3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movb (%rsi), %al +; CHECK-EX-NEXT: shlxq %rax, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = shl i64 %v, %v2 @@ -1093,6 +1483,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_lshr2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: shrxq %rsi, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = lshr i64 %v, %v2 ret i64 %ret @@ -1112,6 +1507,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: shrxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_lshr3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movb (%rsi), %al +; CHECK-EX-NEXT: shrxq %rax, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = lshr i64 %v, %v2 @@ -1143,6 +1544,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_ashr2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: sarxq %rsi, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = ashr i64 %v, %v2 ret i64 %ret @@ -1162,6 +1568,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: sarxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_ashr3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movb (%rsi), %al +; CHECK-EX-NEXT: sarxq %rax, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = ashr i64 %v, %v2 @@ -1181,6 +1593,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: andl $15, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_and1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: andl $15, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = and i64 %v, 15 ret i64 %ret @@ -1198,6 +1616,12 @@ ; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: andq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_and2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: andq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = and i64 %v, %v2 ret i64 %ret @@ -1215,6 +1639,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: andq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_and3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: andq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = and i64 %v, %v2 @@ -1245,6 +1675,12 @@ ; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: orq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_or2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: orq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 ret i64 %ret @@ -1262,6 +1698,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: orq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_or3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: orq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = or i64 %v, %v2 @@ -1292,6 +1734,12 @@ ; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: xorq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_xor2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: xorq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = xor i64 %v, %v2 ret i64 %ret @@ -1309,6 +1757,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: xorq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_xor3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: xorq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = xor i64 %v, %v2 @@ -1330,6 +1784,12 @@ ; CHECK-O3-NEXT: cmpq $15, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_icmp1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: cmpq $15, (%rdi) +; CHECK-EX-NEXT: sete %al +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = icmp eq i64 %v, 15 ret i1 %ret @@ -1350,6 +1810,12 @@ ; CHECK-O3-NEXT: cmpq %rsi, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_icmp2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: cmpq %rsi, (%rdi) +; CHECK-EX-NEXT: sete %al +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = icmp eq i64 %v, %v2 ret i1 %ret @@ -1372,6 +1838,13 @@ ; CHECK-O3-NEXT: cmpq %rax, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_icmp3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: cmpq (%rsi), %rax +; CHECK-EX-NEXT: sete %al +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = icmp eq i64 %v, %v2 @@ -1397,6 +1870,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: addq $15, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_add1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: addq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = add i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1416,6 +1894,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: addq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_add2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: addq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = add i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1435,6 +1918,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: addq $-15, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_sub1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: addq $-15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sub i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1454,6 +1942,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: subq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_sub2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: subq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sub i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1489,6 +1982,12 @@ ; CHECK-O3-NEXT: imulq (%rdi), %rsi ; CHECK-O3-NEXT: movq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_mul2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: imulq (%rdi), %rsi +; CHECK-EX-NEXT: movq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = mul i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1525,6 +2024,20 @@ ; CHECK-O3-NEXT: addq %rax, %rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_sdiv1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: imulq %rdx +; CHECK-EX-NEXT: addq %rcx, %rdx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: shrq $63, %rax +; CHECK-EX-NEXT: sarq $3, %rdx +; CHECK-EX-NEXT: addq %rax, %rdx +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sdiv i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1560,6 +2073,26 @@ ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_sdiv2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB74_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rsi +; CHECK-EX-NEXT: movq %rax, (%rdi) +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB74_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: movq %rax, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sdiv i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1568,14 +2101,22 @@ ; Legal, as expected define void @rmw_fold_udiv1(i64* %p, i64 %v) { -; CHECK-LABEL: rmw_fold_udiv1: -; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-NEXT: mulq %rcx -; CHECK-NEXT: shrq $3, %rdx -; CHECK-NEXT: movq %rdx, (%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: rmw_fold_udiv1: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq (%rdi), %rax +; CHECK-NOX-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 +; CHECK-NOX-NEXT: mulq %rcx +; CHECK-NOX-NEXT: shrq $3, %rdx +; CHECK-NOX-NEXT: movq %rdx, (%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_udiv1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-EX-NEXT: mulq (%rdi) +; CHECK-EX-NEXT: shrq $3, %rdx +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = udiv i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1612,6 +2153,26 @@ ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_udiv2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB76_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rsi +; CHECK-EX-NEXT: movq %rax, (%rdi) +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB76_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: movq %rax, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = udiv i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1654,6 +2215,23 @@ ; CHECK-O3-NEXT: subq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_srem1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: imulq %rdx +; CHECK-EX-NEXT: addq %rcx, %rdx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: shrq $63, %rax +; CHECK-EX-NEXT: sarq $3, %rdx +; CHECK-EX-NEXT: addq %rax, %rdx +; CHECK-EX-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: subq %rax, %rcx +; CHECK-EX-NEXT: movq %rcx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = srem i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1689,6 +2267,26 @@ ; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_srem2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB78_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rsi +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB78_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = srem i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1723,6 +2321,19 @@ ; CHECK-O3-NEXT: subq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_urem1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: mulq %rdx +; CHECK-EX-NEXT: shrq $3, %rdx +; CHECK-EX-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: subq %rax, %rcx +; CHECK-EX-NEXT: movq %rcx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = urem i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1759,6 +2370,26 @@ ; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_urem2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB80_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rsi +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB80_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = urem i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1767,12 +2398,17 @@ ; Legal to fold (TODO) define void @rmw_fold_shl1(i64* %p, i64 %v) { -; CHECK-LABEL: rmw_fold_shl1: -; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: shlq $15, %rax -; CHECK-NEXT: movq %rax, (%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: rmw_fold_shl1: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq (%rdi), %rax +; CHECK-NOX-NEXT: shlq $15, %rax +; CHECK-NOX-NEXT: movq %rax, (%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_shl1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: shlq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = shl i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1796,6 +2432,13 @@ ; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_shl2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rcx +; CHECK-EX-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-EX-NEXT: shlq %cl, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = shl i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1804,12 +2447,17 @@ ; Legal to fold (TODO) define void @rmw_fold_lshr1(i64* %p, i64 %v) { -; CHECK-LABEL: rmw_fold_lshr1: -; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: shrq $15, %rax -; CHECK-NEXT: movq %rax, (%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: rmw_fold_lshr1: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq (%rdi), %rax +; CHECK-NOX-NEXT: shrq $15, %rax +; CHECK-NOX-NEXT: movq %rax, (%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_lshr1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: shrq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = lshr i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1833,6 +2481,13 @@ ; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_lshr2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rcx +; CHECK-EX-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-EX-NEXT: shrq %cl, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = lshr i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1841,12 +2496,17 @@ ; Legal to fold (TODO) define void @rmw_fold_ashr1(i64* %p, i64 %v) { -; CHECK-LABEL: rmw_fold_ashr1: -; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: sarq $15, %rax -; CHECK-NEXT: movq %rax, (%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: rmw_fold_ashr1: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq (%rdi), %rax +; CHECK-NOX-NEXT: sarq $15, %rax +; CHECK-NOX-NEXT: movq %rax, (%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_ashr1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: sarq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = ashr i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1870,6 +2530,13 @@ ; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_ashr2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rcx +; CHECK-EX-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-EX-NEXT: sarq %cl, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = ashr i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1891,6 +2558,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: andq $15, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_and1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: andq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = and i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1910,6 +2582,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: andq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_and2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: andq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = and i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1929,6 +2606,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: orq $15, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_or1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: orq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = or i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1948,6 +2630,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: orq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_or2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: orq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = or i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1967,6 +2654,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: xorq $15, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_xor1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: xorq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = xor i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1986,6 +2678,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: xorq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_xor2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: xorq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = xor i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1997,11 +2694,16 @@ ; Legal to reduce the load width (TODO) define i32 @fold_trunc(i64* %p) { -; CHECK-LABEL: fold_trunc: -; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: # kill: def $eax killed $eax killed $rax -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: fold_trunc: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq (%rdi), %rax +; CHECK-NOX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: fold_trunc: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = trunc i64 %v to i32 ret i32 %ret @@ -2022,6 +2724,12 @@ ; CHECK-O3-NEXT: addl %esi, %eax ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_trunc_add: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, %eax +; CHECK-EX-NEXT: addl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 %ret = add i32 %trunc, %v2 @@ -2043,6 +2751,12 @@ ; CHECK-O3-NEXT: andl %esi, %eax ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_trunc_and: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, %eax +; CHECK-EX-NEXT: andl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 %ret = and i32 %trunc, %v2 @@ -2064,6 +2778,12 @@ ; CHECK-O3-NEXT: orl %esi, %eax ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_trunc_or: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, %eax +; CHECK-EX-NEXT: orl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 %ret = or i32 %trunc, %v2 @@ -2091,6 +2811,15 @@ ; CHECK-O3-NEXT: orl %eax, %ecx ; CHECK-O3-NEXT: movzbl %cl, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: split_load: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: orl %eax, %ecx +; CHECK-EX-NEXT: movzbl %cl, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %b1 = trunc i64 %v to i8 %v.shift = lshr i64 %v, 32 @@ -2117,11 +2846,16 @@ ; Legal to forward and fold (TODO) define i64 @load_forwarding(i64* %p) { -; CHECK-LABEL: load_forwarding: -; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: orq (%rdi), %rax -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: load_forwarding: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq (%rdi), %rax +; CHECK-NOX-NEXT: orq (%rdi), %rax +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: load_forwarding: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %v2 = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 @@ -2130,11 +2864,17 @@ ; Legal to forward (TODO) define i64 @store_forward(i64* %p, i64 %v) { -; CHECK-LABEL: store_forward: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %rsi, (%rdi) -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: store_forward: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq %rsi, (%rdi) +; CHECK-NOX-NEXT: movq (%rdi), %rax +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: store_forward: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: movq %rsi, (%rdi) +; CHECK-EX-NEXT: retq store atomic i64 %v, i64* %p unordered, align 8 %ret = load atomic i64, i64* %p unordered, align 8 ret i64 %ret @@ -2154,11 +2894,16 @@ ; Legal to kill (TODO) define void @dead_store(i64* %p, i64 %v) { -; CHECK-LABEL: dead_store: -; CHECK: # %bb.0: -; CHECK-NEXT: movq $0, (%rdi) -; CHECK-NEXT: movq %rsi, (%rdi) -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: dead_store: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq $0, (%rdi) +; CHECK-NOX-NEXT: movq %rsi, (%rdi) +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: dead_store: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, (%rdi) +; CHECK-EX-NEXT: retq store atomic i64 0, i64* %p unordered, align 8 store atomic i64 %v, i64* %p unordered, align 8 ret void @@ -2227,18 +2972,31 @@ ; CHECK-O3-NEXT: movq %rdi, %rax ; CHECK-O3-NEXT: addq {{.*}}(%rip), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_constant: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rdi, %rax +; CHECK-EX-NEXT: addq {{.*}}(%rip), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 %ret = add i64 %v, %arg ret i64 %ret } define i64 @fold_constant_clobber(i64* %p, i64 %arg) { -; CHECK-LABEL: fold_constant_clobber: -; CHECK: # %bb.0: -; CHECK-NEXT: movq {{.*}}(%rip), %rax -; CHECK-NEXT: movq $5, (%rdi) -; CHECK-NEXT: addq %rsi, %rax -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: fold_constant_clobber: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NOX-NEXT: movq $5, (%rdi) +; CHECK-NOX-NEXT: addq %rsi, %rax +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: fold_constant_clobber: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: movq $5, (%rdi) +; CHECK-EX-NEXT: addq {{.*}}(%rip), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 store i64 5, i64* %p %ret = add i64 %v, %arg @@ -2246,12 +3004,19 @@ } define i64 @fold_constant_fence(i64 %arg) { -; CHECK-LABEL: fold_constant_fence: -; CHECK: # %bb.0: -; CHECK-NEXT: movq {{.*}}(%rip), %rax -; CHECK-NEXT: mfence -; CHECK-NEXT: addq %rdi, %rax -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: fold_constant_fence: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq {{.*}}(%rip), %rax +; CHECK-NOX-NEXT: mfence +; CHECK-NOX-NEXT: addq %rdi, %rax +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: fold_constant_fence: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rdi, %rax +; CHECK-EX-NEXT: mfence +; CHECK-EX-NEXT: addq {{.*}}(%rip), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 fence seq_cst %ret = add i64 %v, %arg @@ -2259,12 +3024,19 @@ } define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) { -; CHECK-LABEL: fold_invariant_clobber: -; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: movq $5, (%rdi) -; CHECK-NEXT: addq %rsi, %rax -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: fold_invariant_clobber: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq (%rdi), %rax +; CHECK-NOX-NEXT: movq $5, (%rdi) +; CHECK-NOX-NEXT: addq %rsi, %rax +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: fold_invariant_clobber: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: movq $5, (%rdi) +; CHECK-EX-NEXT: addq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} store i64 5, i64* %p %ret = add i64 %v, %arg @@ -2273,12 +3045,19 @@ define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) { -; CHECK-LABEL: fold_invariant_fence: -; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: mfence -; CHECK-NEXT: addq %rsi, %rax -; CHECK-NEXT: retq +; CHECK-NOX-LABEL: fold_invariant_fence: +; CHECK-NOX: # %bb.0: +; CHECK-NOX-NEXT: movq (%rdi), %rax +; CHECK-NOX-NEXT: mfence +; CHECK-NOX-NEXT: addq %rsi, %rax +; CHECK-NOX-NEXT: retq +; +; CHECK-EX-LABEL: fold_invariant_fence: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: mfence +; CHECK-EX-NEXT: addq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} fence seq_cst %ret = add i64 %v, %arg