Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -2187,8 +2187,6 @@ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { LSBaseSDNodeBits.AddressingMode = AM; assert(getAddressingMode() == AM && "Value truncated"); - assert((!MMO->isAtomic() || MMO->isVolatile()) && - "use an AtomicSDNode instead for non-volatile atomics"); } const SDValue &getOffset() const { Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -3678,6 +3678,25 @@ return MachineMemOperand::MONone; } + /// Should SelectionDAG lower an atomic store of the given kind as a normal + /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to + /// eventually migrate all targets to the using StoreSDNodes, but porting is + /// being done target at a time. + virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { + assert(SI.isAtomic()); + return false; + } + + /// Should SelectionDAG lower an atomic load of the given kind as a normal + /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to + /// eventually migrate all targets to the using LoadSDNodes, but porting is + /// being done target at a time. + virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { + assert(LI.isAtomic()); + return false; + } + + /// This callback is invoked by the type legalizer to legalize nodes with an /// illegal operand type but legal result types. It replaces the /// LowerOperation callback in the type Legalizer. The reason we can not do Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4652,9 +4652,26 @@ AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, - getValue(I.getPointerOperand()), MMO); + + SDValue Ptr = getValue(I.getPointerOperand()); + + if (TLI.lowerAtomicLoadAsLoadSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for loads to prevent future divergence. + SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO); + if (MemVT != VT) + L = DAG.getPtrExtOrTrunc(L, dl, VT); + + setValue(&I, L); + if (!I.isUnordered()) { + SDValue OutChain = L.getValue(1); + DAG.setRoot(OutChain); + } + return; + } + + SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, + Ptr, MMO); SDValue OutChain = L.getValue(1); if (MemVT != VT) @@ -4693,9 +4710,17 @@ SDValue Val = getValue(I.getValueOperand()); if (Val.getValueType() != MemVT) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); + SDValue Ptr = getValue(I.getPointerOperand()); + if (TLI.lowerAtomicStoreAsStoreSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for stores to prevent future divergence. + SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO); + DAG.setRoot(S); + return; + } SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, - getValue(I.getPointerOperand()), Val, MMO); + Ptr, Val, MMO); DAG.setRoot(OutChain); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1386,6 +1386,9 @@ LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; + bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override; + bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override; + bool needsCmpXchgNb(Type *MemType) const; void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -78,6 +78,13 @@ "SHIFT, LEA, etc."), cl::Hidden); +static cl::opt ExperimentalUnorderedISEL( + "x86-experimental-unordered-atomic-isel", cl::init(false), + cl::desc("Use LoadSDNode and StoreSDNode instead of " + "AtomicSDNode for unordered atomic loads and " + "stores respectively."), + cl::Hidden); + /// Call this when the user attempts to do something unsupported, like /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike /// report_fatal_error, so calling code should attempt to recover without @@ -26022,6 +26029,18 @@ return Loaded; } +bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { + if (!SI.isUnordered()) + return false; + return ExperimentalUnorderedISEL; +} +bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { + if (!LI.isUnordered()) + return false; + return ExperimentalUnorderedISEL; +} + + /// Emit a locked operation on a stack location which does not change any /// memory location, but does involve a lock prefix. Location is chosen to be /// a) very likely accessed only by a single thread to minimize cache traffic, Index: test/CodeGen/X86/atomic-unordered.ll =================================================================== --- test/CodeGen/X86/atomic-unordered.ll +++ test/CodeGen/X86/atomic-unordered.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefix=CHECK-O0 %s ; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefix=CHECK-O3 %s +; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel | FileCheck --check-prefix=CHECK-EX %s define i8 @load_i8(i8* %ptr) { ; CHECK-O0-LABEL: load_i8: @@ -12,6 +13,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movb (%rdi), %al ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_i8: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movb (%rdi), %al +; CHECK-EX-NEXT: retq %v = load atomic i8, i8* %ptr unordered, align 1 ret i8 %v } @@ -27,6 +33,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movb %sil, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_i8: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movb %sil, (%rdi) +; CHECK-EX-NEXT: retq store atomic i8 %v, i8* %ptr unordered, align 1 ret void } @@ -41,6 +52,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movzwl (%rdi), %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_i16: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movzwl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i16, i16* %ptr unordered, align 2 ret i16 %v } @@ -57,6 +73,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movw %si, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_i16: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movw %si, (%rdi) +; CHECK-EX-NEXT: retq store atomic i16 %v, i16* %ptr unordered, align 2 ret void } @@ -71,6 +92,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movl (%rdi), %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_i32: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i32, i32* %ptr unordered, align 4 ret i32 %v } @@ -85,6 +111,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movl %esi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_i32: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, (%rdi) +; CHECK-EX-NEXT: retq store atomic i32 %v, i32* %ptr unordered, align 4 ret void } @@ -99,6 +130,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_i64: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 ret i64 %v } @@ -113,6 +149,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_i64: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, (%rdi) +; CHECK-EX-NEXT: retq store atomic i64 %v, i64* %ptr unordered, align 8 ret void } @@ -136,6 +177,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: orq $7, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: narrow_writeback_or: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: orq $7, (%rdi) +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 %v.new = or i64 %v, 7 store atomic i64 %v.new, i64* %ptr unordered, align 8 @@ -158,6 +204,12 @@ ; CHECK-O3-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00 ; CHECK-O3-NEXT: andq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: narrow_writeback_and: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00 +; CHECK-EX-NEXT: andq %rax, (%rdi) +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 %v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00 store atomic i64 %v.new, i64* %ptr unordered, align 8 @@ -177,6 +229,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: xorq $7, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: narrow_writeback_xor: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: xorq $7, (%rdi) +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 %v.new = xor i64 %v, 7 store atomic i64 %v.new, i64* %ptr unordered, align 8 @@ -207,6 +264,12 @@ ; CHECK-O3-NEXT: movl %esi, (%rdi) ; CHECK-O3-NEXT: movl %edx, 4(%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: widen_store: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, (%rdi) +; CHECK-EX-NEXT: movl %edx, 4(%rdi) +; CHECK-EX-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 8 store atomic i32 %v2, i32* %p1 unordered, align 4 @@ -228,6 +291,12 @@ ; CHECK-O3-NEXT: movl %esi, (%rdi) ; CHECK-O3-NEXT: movl %edx, 4(%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: widen_store_unaligned: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, (%rdi) +; CHECK-EX-NEXT: movl %edx, 4(%rdi) +; CHECK-EX-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 4 store atomic i32 %v2, i32* %p1 unordered, align 4 @@ -247,6 +316,12 @@ ; CHECK-O3-NEXT: movl %esi, (%rdi) ; CHECK-O3-NEXT: movl %esi, 4(%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: widen_broadcast: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, (%rdi) +; CHECK-EX-NEXT: movl %esi, 4(%rdi) +; CHECK-EX-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v, i32* %p0 unordered, align 8 store atomic i32 %v, i32* %p1 unordered, align 4 @@ -266,6 +341,12 @@ ; CHECK-O3-NEXT: movl %esi, (%rdi) ; CHECK-O3-NEXT: movl %esi, 4(%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: widen_broadcast_unaligned: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, (%rdi) +; CHECK-EX-NEXT: movl %esi, 4(%rdi) +; CHECK-EX-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v, i32* %p0 unordered, align 4 store atomic i32 %v, i32* %p1 unordered, align 4 @@ -302,6 +383,20 @@ ; CHECK-O3-NEXT: popq %rbx ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_i128: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: pushq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 16 +; CHECK-EX-NEXT: .cfi_offset %rbx, -16 +; CHECK-EX-NEXT: xorl %eax, %eax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: xorl %ecx, %ecx +; CHECK-EX-NEXT: xorl %ebx, %ebx +; CHECK-EX-NEXT: lock cmpxchg16b (%rdi) +; CHECK-EX-NEXT: popq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 8 +; CHECK-EX-NEXT: retq %v = load atomic i128, i128* %ptr unordered, align 16 ret i128 %v } @@ -355,6 +450,25 @@ ; CHECK-O3-NEXT: popq %rbx ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_i128: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: pushq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 16 +; CHECK-EX-NEXT: .cfi_offset %rbx, -16 +; CHECK-EX-NEXT: movq %rdx, %rcx +; CHECK-EX-NEXT: movq %rsi, %rbx +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq 8(%rdi), %rdx +; CHECK-EX-NEXT: .p2align 4, 0x90 +; CHECK-EX-NEXT: .LBB16_1: # %atomicrmw.start +; CHECK-EX-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-EX-NEXT: lock cmpxchg16b (%rdi) +; CHECK-EX-NEXT: jne .LBB16_1 +; CHECK-EX-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-EX-NEXT: popq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 8 +; CHECK-EX-NEXT: retq store atomic i128 %v, i128* %ptr unordered, align 16 ret void } @@ -408,6 +522,28 @@ ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: vzeroupper ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_i256: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: pushq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 16 +; CHECK-EX-NEXT: subq $32, %rsp +; CHECK-EX-NEXT: .cfi_def_cfa_offset 48 +; CHECK-EX-NEXT: .cfi_offset %rbx, -16 +; CHECK-EX-NEXT: movq %rdi, %rbx +; CHECK-EX-NEXT: movq %rsp, %rdx +; CHECK-EX-NEXT: movl $32, %edi +; CHECK-EX-NEXT: xorl %ecx, %ecx +; CHECK-EX-NEXT: callq __atomic_load +; CHECK-EX-NEXT: vmovups (%rsp), %ymm0 +; CHECK-EX-NEXT: vmovups %ymm0, (%rbx) +; CHECK-EX-NEXT: movq %rbx, %rax +; CHECK-EX-NEXT: addq $32, %rsp +; CHECK-EX-NEXT: .cfi_def_cfa_offset 16 +; CHECK-EX-NEXT: popq %rbx +; CHECK-EX-NEXT: .cfi_def_cfa_offset 8 +; CHECK-EX-NEXT: vzeroupper +; CHECK-EX-NEXT: retq %v = load atomic i256, i256* %ptr unordered, align 16 ret i256 %v } @@ -451,6 +587,24 @@ ; CHECK-O3-NEXT: addq $40, %rsp ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_i256: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: subq $40, %rsp +; CHECK-EX-NEXT: .cfi_def_cfa_offset 48 +; CHECK-EX-NEXT: movq %rdi, %rax +; CHECK-EX-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; CHECK-EX-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-EX-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-EX-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-EX-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CHECK-EX-NEXT: movl $32, %edi +; CHECK-EX-NEXT: movq %rax, %rsi +; CHECK-EX-NEXT: xorl %ecx, %ecx +; CHECK-EX-NEXT: callq __atomic_store +; CHECK-EX-NEXT: addq $40, %rsp +; CHECK-EX-NEXT: .cfi_def_cfa_offset 8 +; CHECK-EX-NEXT: retq store atomic i256 %v, i256* %ptr unordered, align 16 ret void } @@ -472,6 +626,12 @@ ; CHECK-O3-NEXT: movl %eax, (%rdi) ; CHECK-O3-NEXT: movl %ecx, 4(%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: vec_store: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: vmovss %xmm0, (%rdi) +; CHECK-EX-NEXT: vextractps $1, %xmm0, 4(%rdi) +; CHECK-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %v2 = extractelement <2 x i32> %vec, i32 1 %p1 = getelementptr i32, i32* %p0, i64 1 @@ -497,6 +657,12 @@ ; CHECK-O3-NEXT: movl %eax, (%rdi) ; CHECK-O3-NEXT: movl %ecx, 4(%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: vec_store_unaligned: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: vmovss %xmm0, (%rdi) +; CHECK-EX-NEXT: vextractps $1, %xmm0, 4(%rdi) +; CHECK-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %v2 = extractelement <2 x i32> %vec, i32 1 %p1 = getelementptr i32, i32* %p0, i64 1 @@ -523,6 +689,12 @@ ; CHECK-O3-NEXT: movl %eax, (%rdi) ; CHECK-O3-NEXT: movl %eax, 4(%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: widen_broadcast2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: vmovss %xmm0, (%rdi) +; CHECK-EX-NEXT: vmovss %xmm0, 4(%rdi) +; CHECK-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 8 @@ -545,6 +717,12 @@ ; CHECK-O3-NEXT: movl %eax, (%rdi) ; CHECK-O3-NEXT: movl %eax, 4(%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: widen_broadcast2_unaligned: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: vmovss %xmm0, (%rdi) +; CHECK-EX-NEXT: vmovss %xmm0, 4(%rdi) +; CHECK-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 4 @@ -565,6 +743,11 @@ ; CHECK-O3-NEXT: movl $0, (%rdi) ; CHECK-O3-NEXT: movl $0, 4(%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: widen_zero_init: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq $0, (%rdi) +; CHECK-EX-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 0, i32* %p0 unordered, align 8 store atomic i32 0, i32* %p1 unordered, align 4 @@ -584,6 +767,11 @@ ; CHECK-O3-NEXT: movl $0, (%rdi) ; CHECK-O3-NEXT: movl $0, 4(%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: widen_zero_init_unaligned: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq $0, (%rdi) +; CHECK-EX-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 0, i32* %p0 unordered, align 4 store atomic i32 0, i32* %p1 unordered, align 4 @@ -606,6 +794,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: addq $15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_add1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: addq $15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = add i64 %v, 15 ret i64 %ret @@ -623,6 +817,12 @@ ; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: addq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_add2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: addq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = add i64 %v, %v2 ret i64 %ret @@ -640,6 +840,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: addq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_add3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: addq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = add i64 %v, %v2 @@ -659,6 +865,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: addq $-15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_sub1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: addq $-15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sub i64 %v, 15 ret i64 %ret @@ -676,6 +888,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: subq %rsi, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_sub2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: subq %rsi, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sub i64 %v, %v2 ret i64 %ret @@ -693,6 +911,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: subq (%rsi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_sub3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: subq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = sub i64 %v, %v2 @@ -712,6 +936,13 @@ ; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax ; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_mul1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = mul i64 %v, 15 ret i64 %ret @@ -729,6 +960,12 @@ ; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: imulq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_mul2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: imulq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = mul i64 %v, %v2 ret i64 %ret @@ -746,6 +983,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: imulq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_mul3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: imulq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = mul i64 %v, %v2 @@ -774,6 +1017,19 @@ ; CHECK-O3-NEXT: sarq $3, %rdx ; CHECK-O3-NEXT: addq %rdx, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_sdiv1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: imulq %rdx +; CHECK-EX-NEXT: addq %rcx, %rdx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: shrq $63, %rax +; CHECK-EX-NEXT: sarq $3, %rdx +; CHECK-EX-NEXT: addq %rdx, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sdiv i64 %v, 15 ret i64 %ret @@ -805,6 +1061,24 @@ ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_sdiv2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB35_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rsi +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB35_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sdiv i64 %v, %v2 ret i64 %ret @@ -836,6 +1110,25 @@ ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_sdiv3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq (%rsi), %rcx +; CHECK-EX-NEXT: movq %rax, %rdx +; CHECK-EX-NEXT: orq %rcx, %rdx +; CHECK-EX-NEXT: shrq $32, %rdx +; CHECK-EX-NEXT: je .LBB36_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rcx +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB36_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %ecx +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = sdiv i64 %v, %v2 @@ -861,6 +1154,14 @@ ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: shrq $3, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_udiv1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-EX-NEXT: mulq (%rdi) +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: shrq $3, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = udiv i64 %v, 15 ret i64 %ret @@ -892,6 +1193,24 @@ ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_udiv2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB38_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rsi +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB38_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = udiv i64 %v, %v2 ret i64 %ret @@ -924,6 +1243,25 @@ ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_udiv3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq (%rsi), %rcx +; CHECK-EX-NEXT: movq %rax, %rdx +; CHECK-EX-NEXT: orq %rcx, %rdx +; CHECK-EX-NEXT: shrq $32, %rdx +; CHECK-EX-NEXT: je .LBB39_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rcx +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB39_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %ecx +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = udiv i64 %v, %v2 @@ -957,6 +1295,23 @@ ; CHECK-O3-NEXT: subq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_srem1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: imulq %rdx +; CHECK-EX-NEXT: addq %rcx, %rdx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: shrq $63, %rax +; CHECK-EX-NEXT: sarq $3, %rdx +; CHECK-EX-NEXT: addq %rax, %rdx +; CHECK-EX-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: subq %rax, %rcx +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = srem i64 %v, 15 ret i64 %ret @@ -990,6 +1345,25 @@ ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_srem2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB41_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rsi +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB41_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: movl %edx, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = srem i64 %v, %v2 ret i64 %ret @@ -1023,6 +1397,26 @@ ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_srem3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq (%rsi), %rcx +; CHECK-EX-NEXT: movq %rax, %rdx +; CHECK-EX-NEXT: orq %rcx, %rdx +; CHECK-EX-NEXT: shrq $32, %rdx +; CHECK-EX-NEXT: je .LBB42_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rcx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB42_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %ecx +; CHECK-EX-NEXT: movl %edx, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = srem i64 %v, %v2 @@ -1053,6 +1447,19 @@ ; CHECK-O3-NEXT: subq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_urem1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: mulq %rdx +; CHECK-EX-NEXT: shrq $3, %rdx +; CHECK-EX-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: subq %rax, %rcx +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = urem i64 %v, 15 ret i64 %ret @@ -1087,6 +1494,25 @@ ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_urem2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB44_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rsi +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB44_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: movl %edx, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = urem i64 %v, %v2 ret i64 %ret @@ -1121,6 +1547,26 @@ ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_urem3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq (%rsi), %rcx +; CHECK-EX-NEXT: movq %rax, %rdx +; CHECK-EX-NEXT: orq %rcx, %rdx +; CHECK-EX-NEXT: shrq $32, %rdx +; CHECK-EX-NEXT: je .LBB45_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rcx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB45_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %ecx +; CHECK-EX-NEXT: movl %edx, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = urem i64 %v, %v2 @@ -1140,6 +1586,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: shlq $15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_shl1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: shlq $15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = shl i64 %v, 15 ret i64 %ret @@ -1158,6 +1610,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_shl2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: shlxq %rsi, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = shl i64 %v, %v2 ret i64 %ret @@ -1177,6 +1634,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: shlxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_shl3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movb (%rsi), %al +; CHECK-EX-NEXT: shlxq %rax, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = shl i64 %v, %v2 @@ -1196,6 +1659,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: shrq $15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_lshr1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: shrq $15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = lshr i64 %v, 15 ret i64 %ret @@ -1214,6 +1683,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_lshr2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: shrxq %rsi, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = lshr i64 %v, %v2 ret i64 %ret @@ -1233,6 +1707,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: shrxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_lshr3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movb (%rsi), %al +; CHECK-EX-NEXT: shrxq %rax, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = lshr i64 %v, %v2 @@ -1252,6 +1732,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: sarq $15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_ashr1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: sarq $15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = ashr i64 %v, 15 ret i64 %ret @@ -1270,6 +1756,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_ashr2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: sarxq %rsi, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = ashr i64 %v, %v2 ret i64 %ret @@ -1289,6 +1780,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: sarxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_ashr3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movb (%rsi), %al +; CHECK-EX-NEXT: sarxq %rax, (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = ashr i64 %v, %v2 @@ -1308,6 +1805,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: andl $15, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_and1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: andl $15, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = and i64 %v, 15 ret i64 %ret @@ -1325,6 +1828,12 @@ ; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: andq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_and2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: andq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = and i64 %v, %v2 ret i64 %ret @@ -1342,6 +1851,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: andq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_and3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: andq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = and i64 %v, %v2 @@ -1361,6 +1876,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: orq $15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_or1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: orq $15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, 15 ret i64 %ret @@ -1378,6 +1899,12 @@ ; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: orq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_or2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: orq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 ret i64 %ret @@ -1395,6 +1922,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: orq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_or3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: orq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = or i64 %v, %v2 @@ -1414,6 +1947,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: xorq $15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_xor1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: xorq $15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = xor i64 %v, 15 ret i64 %ret @@ -1431,6 +1970,12 @@ ; CHECK-O3-NEXT: movq %rsi, %rax ; CHECK-O3-NEXT: xorq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_xor2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: xorq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = xor i64 %v, %v2 ret i64 %ret @@ -1448,6 +1993,12 @@ ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: xorq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_xor3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: xorq (%rsi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = xor i64 %v, %v2 @@ -1469,6 +2020,12 @@ ; CHECK-O3-NEXT: cmpq $15, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_icmp1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: cmpq $15, (%rdi) +; CHECK-EX-NEXT: sete %al +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = icmp eq i64 %v, 15 ret i1 %ret @@ -1489,6 +2046,12 @@ ; CHECK-O3-NEXT: cmpq %rsi, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_icmp2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: cmpq %rsi, (%rdi) +; CHECK-EX-NEXT: sete %al +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = icmp eq i64 %v, %v2 ret i1 %ret @@ -1511,6 +2074,13 @@ ; CHECK-O3-NEXT: cmpq %rax, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_fold_icmp3: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: cmpq (%rsi), %rax +; CHECK-EX-NEXT: sete %al +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = icmp eq i64 %v, %v2 @@ -1536,6 +2106,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: addq $15, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_add1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: addq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = add i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1555,6 +2130,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: addq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_add2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: addq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = add i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1574,6 +2154,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: addq $-15, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_sub1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: addq $-15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sub i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1593,6 +2178,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: subq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_sub2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: subq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sub i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1616,6 +2206,14 @@ ; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_mul1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: movq %rax, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = mul i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1636,6 +2234,12 @@ ; CHECK-O3-NEXT: imulq (%rdi), %rsi ; CHECK-O3-NEXT: movq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_mul2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: imulq (%rdi), %rsi +; CHECK-EX-NEXT: movq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = mul i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1672,6 +2276,20 @@ ; CHECK-O3-NEXT: addq %rax, %rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_sdiv1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: imulq %rdx +; CHECK-EX-NEXT: addq %rcx, %rdx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: shrq $63, %rax +; CHECK-EX-NEXT: sarq $3, %rdx +; CHECK-EX-NEXT: addq %rax, %rdx +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sdiv i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1707,6 +2325,26 @@ ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_sdiv2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB74_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rsi +; CHECK-EX-NEXT: movq %rax, (%rdi) +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB74_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: movq %rax, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sdiv i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1732,6 +2370,14 @@ ; CHECK-O3-NEXT: shrq $3, %rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_udiv1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 +; CHECK-EX-NEXT: mulq (%rdi) +; CHECK-EX-NEXT: shrq $3, %rdx +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = udiv i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1768,6 +2414,26 @@ ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_udiv2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB76_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rsi +; CHECK-EX-NEXT: movq %rax, (%rdi) +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB76_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $eax killed $eax def $rax +; CHECK-EX-NEXT: movq %rax, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = udiv i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1810,6 +2476,23 @@ ; CHECK-O3-NEXT: subq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_srem1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: imulq %rdx +; CHECK-EX-NEXT: addq %rcx, %rdx +; CHECK-EX-NEXT: movq %rdx, %rax +; CHECK-EX-NEXT: shrq $63, %rax +; CHECK-EX-NEXT: sarq $3, %rdx +; CHECK-EX-NEXT: addq %rax, %rdx +; CHECK-EX-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: subq %rax, %rcx +; CHECK-EX-NEXT: movq %rcx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = srem i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1845,6 +2528,26 @@ ; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_srem2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB78_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: cqto +; CHECK-EX-NEXT: idivq %rsi +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB78_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = srem i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1879,6 +2582,19 @@ ; CHECK-O3-NEXT: subq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_urem1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rcx +; CHECK-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-EX-NEXT: movq %rcx, %rax +; CHECK-EX-NEXT: mulq %rdx +; CHECK-EX-NEXT: shrq $3, %rdx +; CHECK-EX-NEXT: leaq (%rdx,%rdx,4), %rax +; CHECK-EX-NEXT: leaq (%rax,%rax,2), %rax +; CHECK-EX-NEXT: subq %rax, %rcx +; CHECK-EX-NEXT: movq %rcx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = urem i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1915,6 +2631,26 @@ ; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_urem2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: orq %rsi, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: je .LBB80_1 +; CHECK-EX-NEXT: # %bb.2: +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divq %rsi +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq +; CHECK-EX-NEXT: .LBB80_1: +; CHECK-EX-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-EX-NEXT: xorl %edx, %edx +; CHECK-EX-NEXT: divl %esi +; CHECK-EX-NEXT: # kill: def $edx killed $edx def $rdx +; CHECK-EX-NEXT: movq %rdx, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = urem i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1936,6 +2672,11 @@ ; CHECK-O3-NEXT: shlq $15, %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_shl1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: shlq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = shl i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -1959,6 +2700,13 @@ ; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_shl2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rcx +; CHECK-EX-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-EX-NEXT: shlq %cl, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = shl i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -1980,6 +2728,11 @@ ; CHECK-O3-NEXT: shrq $15, %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_lshr1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: shrq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = lshr i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -2003,6 +2756,13 @@ ; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_lshr2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rcx +; CHECK-EX-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-EX-NEXT: shrq %cl, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = lshr i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -2024,6 +2784,11 @@ ; CHECK-O3-NEXT: sarq $15, %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_ashr1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: sarq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = ashr i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -2047,6 +2812,13 @@ ; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_ashr2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rcx +; CHECK-EX-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-EX-NEXT: sarq %cl, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = ashr i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -2068,6 +2840,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: andq $15, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_and1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: andq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = and i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -2087,6 +2864,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: andq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_and2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: andq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = and i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -2106,6 +2888,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: orq $15, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_or1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: orq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = or i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -2125,6 +2912,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: orq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_or2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: orq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = or i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -2144,6 +2936,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: xorq $15, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_xor1: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: xorq $15, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = xor i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 @@ -2163,6 +2960,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: xorq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: rmw_fold_xor2: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: xorq %rsi, (%rdi) +; CHECK-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = xor i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 @@ -2185,6 +2987,11 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_trunc: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = trunc i64 %v to i32 ret i32 %ret @@ -2205,6 +3012,12 @@ ; CHECK-O3-NEXT: addl %esi, %eax ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_trunc_add: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, %eax +; CHECK-EX-NEXT: addl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 %ret = add i32 %trunc, %v2 @@ -2226,6 +3039,12 @@ ; CHECK-O3-NEXT: andl %esi, %eax ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_trunc_and: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, %eax +; CHECK-EX-NEXT: andl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 %ret = and i32 %trunc, %v2 @@ -2247,6 +3066,12 @@ ; CHECK-O3-NEXT: orl %esi, %eax ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_trunc_or: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movl %esi, %eax +; CHECK-EX-NEXT: orl (%rdi), %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 %ret = or i32 %trunc, %v2 @@ -2274,6 +3099,15 @@ ; CHECK-O3-NEXT: orl %eax, %ecx ; CHECK-O3-NEXT: movzbl %cl, %eax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: split_load: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, %rcx +; CHECK-EX-NEXT: shrq $32, %rcx +; CHECK-EX-NEXT: orl %eax, %ecx +; CHECK-EX-NEXT: movzbl %cl, %eax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %b1 = trunc i64 %v to i8 %v.shift = lshr i64 %v, 32 @@ -2299,6 +3133,11 @@ ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: constant_folding: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 ret i64 %v } @@ -2316,6 +3155,11 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: orq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: load_forwarding: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %v2 = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 @@ -2335,6 +3179,12 @@ ; CHECK-O3-NEXT: movq %rsi, (%rdi) ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: store_forward: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: movq %rsi, (%rdi) +; CHECK-EX-NEXT: retq store atomic i64 %v, i64* %p unordered, align 8 %ret = load atomic i64, i64* %p unordered, align 8 ret i64 %ret @@ -2353,6 +3203,12 @@ ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: dead_writeback: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: movq %rax, (%rdi) +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 store atomic i64 %v, i64* %p unordered, align 8 ret void @@ -2371,6 +3227,11 @@ ; CHECK-O3-NEXT: movq $0, (%rdi) ; CHECK-O3-NEXT: movq %rsi, (%rdi) ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: dead_store: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, (%rdi) +; CHECK-EX-NEXT: retq store atomic i64 0, i64* %p unordered, align 8 store atomic i64 %v, i64* %p unordered, align 8 ret void @@ -2397,6 +3258,13 @@ ; CHECK-O3-NEXT: mfence ; CHECK-O3-NEXT: addq $15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: nofold_fence: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: mfence +; CHECK-EX-NEXT: addq $15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 fence seq_cst %ret = add i64 %v, 15 @@ -2417,6 +3285,13 @@ ; CHECK-O3-NEXT: #MEMBARRIER ; CHECK-O3-NEXT: addq $15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: nofold_fence_acquire: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: #MEMBARRIER +; CHECK-EX-NEXT: addq $15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 fence acquire %ret = add i64 %v, 15 @@ -2438,6 +3313,13 @@ ; CHECK-O3-NEXT: #MEMBARRIER ; CHECK-O3-NEXT: addq $15, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: nofold_stfence: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq (%rdi), %rax +; CHECK-EX-NEXT: #MEMBARRIER +; CHECK-EX-NEXT: addq $15, %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 fence syncscope("singlethread") seq_cst %ret = add i64 %v, 15 @@ -2460,6 +3342,12 @@ ; CHECK-O3-NEXT: movq %rdi, %rax ; CHECK-O3-NEXT: addq {{.*}}(%rip), %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_constant: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rdi, %rax +; CHECK-EX-NEXT: addq {{.*}}(%rip), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 %ret = add i64 %v, %arg ret i64 %ret @@ -2479,6 +3367,13 @@ ; CHECK-O3-NEXT: movq $5, (%rdi) ; CHECK-O3-NEXT: addq %rsi, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_constant_clobber: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: movq $5, (%rdi) +; CHECK-EX-NEXT: addq {{.*}}(%rip), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 store i64 5, i64* %p %ret = add i64 %v, %arg @@ -2499,6 +3394,13 @@ ; CHECK-O3-NEXT: mfence ; CHECK-O3-NEXT: addq %rdi, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_constant_fence: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rdi, %rax +; CHECK-EX-NEXT: mfence +; CHECK-EX-NEXT: addq {{.*}}(%rip), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 fence seq_cst %ret = add i64 %v, %arg @@ -2519,6 +3421,13 @@ ; CHECK-O3-NEXT: movq $5, (%rdi) ; CHECK-O3-NEXT: addq %rsi, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_invariant_clobber: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: movq $5, (%rdi) +; CHECK-EX-NEXT: addq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} store i64 5, i64* %p %ret = add i64 %v, %arg @@ -2540,6 +3449,13 @@ ; CHECK-O3-NEXT: mfence ; CHECK-O3-NEXT: addq %rsi, %rax ; CHECK-O3-NEXT: retq +; +; CHECK-EX-LABEL: fold_invariant_fence: +; CHECK-EX: # %bb.0: +; CHECK-EX-NEXT: movq %rsi, %rax +; CHECK-EX-NEXT: mfence +; CHECK-EX-NEXT: addq (%rdi), %rax +; CHECK-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} fence seq_cst %ret = add i64 %v, %arg