diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -550,6 +550,7 @@ -------------------- - Support ``-mindirect-branch-cs-prefix`` for call and jmp to indirect thunk. - Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC. +- Add support for ``RAO-INT`` instructions. DWARF Support in Clang ---------------------- diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4654,6 +4654,8 @@ def mno_prfchw : Flag<["-"], "mno-prfchw">, Group; def mptwrite : Flag<["-"], "mptwrite">, Group; def mno_ptwrite : Flag<["-"], "mno-ptwrite">, Group; +def mraoint : Flag<["-"], "mraoint">, Group; +def mno_raoint : Flag<["-"], "mno-raoint">, Group; def mrdpid : Flag<["-"], "mrdpid">, Group; def mno_rdpid : Flag<["-"], "mno-rdpid">, Group; def mrdpru : Flag<["-"], "mrdpru">, Group; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -135,6 +135,7 @@ bool HasPTWRITE = false; bool HasINVPCID = false; bool HasENQCMD = false; + bool HasRAOINT = false; bool HasKL = false; // For key locker bool HasWIDEKL = false; // For wide key locker bool HasHRESET = false; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -330,6 +330,8 @@ HasAMXINT8 = true; } else if (Feature == "+amx-tile") { HasAMXTILE = true; + } else if (Feature == "+raoint") { + HasRAOINT = true; } else if (Feature == "+avxvnni") { HasAVXVNNI = true; } else if (Feature == "+serialize") { @@ -774,6 +776,8 @@ Builder.defineMacro("__AMXINT8__"); if (HasAMXBF16) Builder.defineMacro("__AMXBF16__"); + if (HasRAOINT) + Builder.defineMacro("__RAOINT__"); if (HasAVXVNNI) Builder.defineMacro("__AVXVNNI__"); if (HasSERIALIZE) @@ -932,6 +936,7 @@ .Case("prefetchwt1", true) .Case("prfchw", true) .Case("ptwrite", true) + .Case("raoint", true) .Case("rdpid", true) .Case("rdpru", true) .Case("rdrnd", true) @@ -1028,6 +1033,7 @@ .Case("prefetchwt1", HasPREFETCHWT1) .Case("prfchw", HasPRFCHW) .Case("ptwrite", HasPTWRITE) + .Case("raoint", HasRAOINT) .Case("rdpid", HasRDPID) .Case("rdpru", HasRDPRU) .Case("rdrnd", HasRDRND) diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -200,6 +200,7 @@ #define bit_AMXINT8 0x02000000 /* Features in %eax for leaf 7 sub-leaf 1 */ +#define bit_RAOINT 0x00000008 #define bit_AVXVNNI 0x00000010 #define bit_AVX512BF16 0x00000020 #define bit_HRESET 0x00400000 diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -305,6 +305,11 @@ // AVX512FP16: "-target-feature" "+avx512fp16" // NO-AVX512FP16: "-target-feature" "-avx512fp16" +// RUN: %clang --target=i386 -march=i386 -mraoint %s -### 2>&1 | FileCheck -check-prefix=RAOINT %s +// RUN: %clang --target=i386 -march=i386 -mno-raoint %s -### 2>&1 | FileCheck -check-prefix=NO-RAOINT %s +// RAOINT: "-target-feature" "+raoint" +// NO-RAOINT: "-target-feature" "-raoint" + // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s // CRC32: "-target-feature" "+crc32" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -581,6 +581,14 @@ // AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1 // AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1 +// RUN: %clang -target i386-unknown-linux-gnu -march=atom -mraoint -x c -E -dM -o - %s | FileCheck -check-prefix=RAOINT %s + +// RAOINT: #define __RAOINT__ 1 + +// RUN: %clang -target i386-unknown-linux-gnu -march=atom -mno-raoint -x c -E -dM -o - %s | FileCheck -check-prefix=NO-RAOINT %s + +// NO-RAOINT-NOT: #define __RAOINT__ 1 + // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s // CRC32: #define __CRC32__ 1 diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -200,6 +200,7 @@ X86_FEATURE (XSAVEOPT, "xsaveopt") X86_FEATURE (XSAVES, "xsaves") X86_FEATURE (HRESET, "hreset") +X86_FEATURE (RAOINT, "raoint") X86_FEATURE (AVX512FP16, "avx512fp16") X86_FEATURE (AVXVNNI, "avxvnni") // These features aren't really CPU features, but the frontend can set them. diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1805,6 +1805,7 @@ Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; bool HasLeaf7Subleaf1 = MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1); Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -581,6 +581,7 @@ constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE; constexpr FeatureBitset ImpliedFeaturesHRESET = {}; +static constexpr FeatureBitset ImpliedFeaturesRAOINT = FeatureSSE2; static constexpr FeatureBitset ImpliedFeaturesAVX512FP16 = FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL; // Key Locker Features diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -254,6 +254,9 @@ def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true", "Support AMX-BF16 instructions", [FeatureAMXTILE]>; +def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true", + "Support RAO-INT instructions", + [FeatureSSE2]>; def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true", "Invalidate Process-Context Identifier">; def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true", diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -793,6 +793,13 @@ LBTC, LBTR, + /// RAO arithmetic instructions. + /// OUTCHAIN = RADD(INCHAIN, PTR, RHS) + RADD, + ROR, + RXOR, + RAND, + // Load, scalar_to_vector, and zero extend. VZEXT_LOAD, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31796,6 +31796,52 @@ return N; } + // We can lower add/sub/or/xor/and into RAO-INT instructions when the result + // is unused. + // TODO: We can manually widen i8/i16 to i32 here to use RAO-INT instruction. + if (Subtarget.hasRAOINT() && + (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()))) { + if (Opc == ISD::ATOMIC_LOAD_SUB) { + RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS, + AN->getMemOperand()); + } + unsigned NewOpc = 0; + switch (N->getOpcode()) { + case ISD::ATOMIC_LOAD_ADD: + NewOpc = X86ISD::RADD; + break; + case ISD::ATOMIC_LOAD_OR: + NewOpc = X86ISD::ROR; + break; + case ISD::ATOMIC_LOAD_XOR: + NewOpc = X86ISD::RXOR; + break; + case ISD::ATOMIC_LOAD_AND: + NewOpc = X86ISD::RAND; + break; + default: + llvm_unreachable("Unexpected ATOMIC_LOAD_ opcode"); + } + + // RAO-INT instructions are weakly-ordered. We need insert MFENCE for order + // stronger than monotonic. + // FIXME: Do we just need LFENCE for acquire? + // FIXME: Do we need trailing fence? + if (isStrongerThanMonotonic(AN->getSuccessOrdering())) { + assert(Subtarget.hasMFence() && "MFENCE is required"); + Chain = DAG.getNode(X86ISD::MFENCE, DL, MVT::Other, Chain); + } + + MachineMemOperand *MMO = cast(N)->getMemOperand(); + SDValue RAO = DAG.getMemIntrinsicNode(NewOpc, DL, DAG.getVTList(MVT::Other), + {Chain, LHS, RHS}, VT, MMO); + + // NOTE: The getUNDEF is needed to give something for the unused result 0. + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), DAG.getUNDEF(VT), + RAO); + } + // Specialized lowering for the canonical form of an idemptotent atomicrmw. // The core idea here is that since the memory location isn't actually // changing, all we need is a lowering for the *ordering* impacts of the @@ -33709,6 +33755,10 @@ NODE_NAME_CASE(LBTS) NODE_NAME_CASE(LBTC) NODE_NAME_CASE(LBTR) + NODE_NAME_CASE(RADD) + NODE_NAME_CASE(ROR) + NODE_NAME_CASE(RXOR) + NODE_NAME_CASE(RAND) NODE_NAME_CASE(VZEXT_MOVL) NODE_NAME_CASE(VZEXT_LOAD) NODE_NAME_CASE(VEXTRACT_STORE) diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -980,6 +980,7 @@ def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">; def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">; def HasKL : Predicate<"Subtarget->hasKL()">; +def HasRAOINT : Predicate<"Subtarget->hasRAOINT()">; def HasWIDEKL : Predicate<"Subtarget->hasWIDEKL()">; def HasHRESET : Predicate<"Subtarget->hasHRESET()">; def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">; @@ -3167,6 +3168,9 @@ // AMX instructions include "X86InstrAMX.td" +// RAO-INT instructions +include "X86InstrRAOINT.td" + // System instructions. include "X86InstrSystem.td" diff --git a/llvm/lib/Target/X86/X86InstrRAOINT.td b/llvm/lib/Target/X86/X86InstrRAOINT.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/X86/X86InstrRAOINT.td @@ -0,0 +1,45 @@ +//===---- X86InstrRAOINT.td -------------------------------*- tablegen -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions that make up the Intel AMX instruction +// set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// RAO-INT instructions + +def SDTRAOBinaryArith : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; + +def X86rao_add : SDNode<"X86ISD::RADD", SDTRAOBinaryArith, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def X86rao_or : SDNode<"X86ISD::ROR", SDTRAOBinaryArith, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def X86rao_xor : SDNode<"X86ISD::RXOR", SDTRAOBinaryArith, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def X86rao_and : SDNode<"X86ISD::RAND", SDTRAOBinaryArith, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +multiclass RAOINT_BASE { + let Predicates = [HasRAOINT] in + def 32mr : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + !strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"), + [(!cast("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>, + Sched<[WriteALURMW]>; + + let Predicates = [HasRAOINT, In64BitMode] in + def 64mr : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + !strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"), + [(!cast("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>, + Sched<[WriteALURMW]>, REX_W; +} + +defm AADD : RAOINT_BASE<"add">, T8PS; +defm AAND : RAOINT_BASE<"and">, T8PD; +defm AOR : RAOINT_BASE<"or" >, T8XD; +defm AXOR : RAOINT_BASE<"xor">, T8XS; diff --git a/llvm/test/CodeGen/X86/atomic-instructions-32.ll b/llvm/test/CodeGen/X86/atomic-instructions-32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/atomic-instructions-32.ll @@ -0,0 +1,575 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86-NO-RAOINT +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=i686-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=X86-RAO-INT +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64-NO-RAOINT +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=X64-RAO-INT + +define i32 @atomic_add32(i32* nocapture %p, i32 %val) nounwind ssp { +; X86-NO-RAOINT-LABEL: atomic_add32: +; X86-NO-RAOINT: # %bb.0: +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock addl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock xaddl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: retl +; +; X86-RAO-INT-LABEL: atomic_add32: +; X86-RAO-INT: # %bb.0: +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: lock xaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: retl +; +; X64-NO-RAOINT-LABEL: atomic_add32: +; X64-NO-RAOINT: # %bb.0: +; X64-NO-RAOINT-NEXT: movl %esi, %eax +; X64-NO-RAOINT-NEXT: lock addl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: lock addl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: lock addl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: lock addl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: lock addl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: lock xaddl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: retq +; +; X64-RAO-INT-LABEL: atomic_add32: +; X64-RAO-INT: # %bb.0: +; X64-RAO-INT-NEXT: movl %esi, %eax +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: lock xaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: retq + %1 = atomicrmw add i32* %p, i32 %val monotonic + %2 = atomicrmw add i32* %p, i32 %val acquire + %3 = atomicrmw add i32* %p, i32 %val release + %4 = atomicrmw add i32* %p, i32 %val acq_rel + %5 = atomicrmw add i32* %p, i32 %val seq_cst + + %6 = atomicrmw add i32* %p, i32 %val seq_cst + ret i32 %6 +} + +define i32 @atomic_or32(i32* nocapture %p, i32 %val) nounwind ssp { +; X86-NO-RAOINT-LABEL: atomic_or32: +; X86-NO-RAOINT: # %bb.0: +; X86-NO-RAOINT-NEXT: pushl %esi +; X86-NO-RAOINT-NEXT: subl $16, %esp +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-RAOINT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: lock orl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock orl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock orl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock orl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock orl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: movl (%eax), %eax +; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: .LBB1_1: # %atomicrmw.start +; X86-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NO-RAOINT-NEXT: movl %eax, %edx +; X86-NO-RAOINT-NEXT: orl %esi, %edx +; X86-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NO-RAOINT-NEXT: sete %cl +; X86-NO-RAOINT-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NO-RAOINT-NEXT: testb $1, %cl +; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: jne .LBB1_2 +; X86-NO-RAOINT-NEXT: jmp .LBB1_1 +; X86-NO-RAOINT-NEXT: .LBB1_2: # %atomicrmw.end +; X86-NO-RAOINT-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NO-RAOINT-NEXT: addl $16, %esp +; X86-NO-RAOINT-NEXT: popl %esi +; X86-NO-RAOINT-NEXT: retl +; +; X86-RAO-INT-LABEL: atomic_or32: +; X86-RAO-INT: # %bb.0: +; X86-RAO-INT-NEXT: pushl %esi +; X86-RAO-INT-NEXT: subl $16, %esp +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-RAO-INT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: aorl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aorl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aorl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aorl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aorl %ecx, (%eax) +; X86-RAO-INT-NEXT: movl (%eax), %eax +; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: .LBB1_1: # %atomicrmw.start +; X86-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-RAO-INT-NEXT: movl %eax, %edx +; X86-RAO-INT-NEXT: orl %esi, %edx +; X86-RAO-INT-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-RAO-INT-NEXT: sete %cl +; X86-RAO-INT-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-RAO-INT-NEXT: testb $1, %cl +; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: jne .LBB1_2 +; X86-RAO-INT-NEXT: jmp .LBB1_1 +; X86-RAO-INT-NEXT: .LBB1_2: # %atomicrmw.end +; X86-RAO-INT-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-RAO-INT-NEXT: addl $16, %esp +; X86-RAO-INT-NEXT: popl %esi +; X86-RAO-INT-NEXT: retl +; +; X64-NO-RAOINT-LABEL: atomic_or32: +; X64-NO-RAOINT: # %bb.0: +; X64-NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NO-RAOINT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock orl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: movl (%rdi), %eax +; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: .LBB1_1: # %atomicrmw.start +; X64-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; X64-NO-RAOINT-NEXT: movl %eax, %edx +; X64-NO-RAOINT-NEXT: orl %esi, %edx +; X64-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%rcx) +; X64-NO-RAOINT-NEXT: sete %cl +; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: testb $1, %cl +; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: jne .LBB1_2 +; X64-NO-RAOINT-NEXT: jmp .LBB1_1 +; X64-NO-RAOINT-NEXT: .LBB1_2: # %atomicrmw.end +; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-NO-RAOINT-NEXT: retq +; +; X64-RAO-INT-LABEL: atomic_or32: +; X64-RAO-INT: # %bb.0: +; X64-RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-RAO-INT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: aorl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aorl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aorl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aorl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aorl %esi, (%rdi) +; X64-RAO-INT-NEXT: movl (%rdi), %eax +; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: .LBB1_1: # %atomicrmw.start +; X64-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; X64-RAO-INT-NEXT: movl %eax, %edx +; X64-RAO-INT-NEXT: orl %esi, %edx +; X64-RAO-INT-NEXT: lock cmpxchgl %edx, (%rcx) +; X64-RAO-INT-NEXT: sete %cl +; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: testb $1, %cl +; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: jne .LBB1_2 +; X64-RAO-INT-NEXT: jmp .LBB1_1 +; X64-RAO-INT-NEXT: .LBB1_2: # %atomicrmw.end +; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-RAO-INT-NEXT: retq + %1 = atomicrmw or i32* %p, i32 %val monotonic + %2 = atomicrmw or i32* %p, i32 %val acquire + %3 = atomicrmw or i32* %p, i32 %val release + %4 = atomicrmw or i32* %p, i32 %val acq_rel + %5 = atomicrmw or i32* %p, i32 %val seq_cst + + %6 = atomicrmw or i32* %p, i32 %val seq_cst + ret i32 %6 +} + +define i32 @atomic_xor32(i32* nocapture %p, i32 %val) nounwind ssp { +; X86-NO-RAOINT-LABEL: atomic_xor32: +; X86-NO-RAOINT: # %bb.0: +; X86-NO-RAOINT-NEXT: pushl %esi +; X86-NO-RAOINT-NEXT: subl $16, %esp +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-RAOINT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock xorl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: movl (%eax), %eax +; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: .LBB2_1: # %atomicrmw.start +; X86-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NO-RAOINT-NEXT: movl %eax, %edx +; X86-NO-RAOINT-NEXT: xorl %esi, %edx +; X86-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NO-RAOINT-NEXT: sete %cl +; X86-NO-RAOINT-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NO-RAOINT-NEXT: testb $1, %cl +; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: jne .LBB2_2 +; X86-NO-RAOINT-NEXT: jmp .LBB2_1 +; X86-NO-RAOINT-NEXT: .LBB2_2: # %atomicrmw.end +; X86-NO-RAOINT-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NO-RAOINT-NEXT: addl $16, %esp +; X86-NO-RAOINT-NEXT: popl %esi +; X86-NO-RAOINT-NEXT: retl +; +; X86-RAO-INT-LABEL: atomic_xor32: +; X86-RAO-INT: # %bb.0: +; X86-RAO-INT-NEXT: pushl %esi +; X86-RAO-INT-NEXT: subl $16, %esp +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-RAO-INT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: axorl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: axorl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: axorl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: axorl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: axorl %ecx, (%eax) +; X86-RAO-INT-NEXT: movl (%eax), %eax +; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: .LBB2_1: # %atomicrmw.start +; X86-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-RAO-INT-NEXT: movl %eax, %edx +; X86-RAO-INT-NEXT: xorl %esi, %edx +; X86-RAO-INT-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-RAO-INT-NEXT: sete %cl +; X86-RAO-INT-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-RAO-INT-NEXT: testb $1, %cl +; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: jne .LBB2_2 +; X86-RAO-INT-NEXT: jmp .LBB2_1 +; X86-RAO-INT-NEXT: .LBB2_2: # %atomicrmw.end +; X86-RAO-INT-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-RAO-INT-NEXT: addl $16, %esp +; X86-RAO-INT-NEXT: popl %esi +; X86-RAO-INT-NEXT: retl +; +; X64-NO-RAOINT-LABEL: atomic_xor32: +; X64-NO-RAOINT: # %bb.0: +; X64-NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NO-RAOINT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock xorl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: movl (%rdi), %eax +; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: .LBB2_1: # %atomicrmw.start +; X64-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; X64-NO-RAOINT-NEXT: movl %eax, %edx +; X64-NO-RAOINT-NEXT: xorl %esi, %edx +; X64-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%rcx) +; X64-NO-RAOINT-NEXT: sete %cl +; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: testb $1, %cl +; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: jne .LBB2_2 +; X64-NO-RAOINT-NEXT: jmp .LBB2_1 +; X64-NO-RAOINT-NEXT: .LBB2_2: # %atomicrmw.end +; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-NO-RAOINT-NEXT: retq +; +; X64-RAO-INT-LABEL: atomic_xor32: +; X64-RAO-INT: # %bb.0: +; X64-RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-RAO-INT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: axorl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: axorl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: axorl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: axorl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: axorl %esi, (%rdi) +; X64-RAO-INT-NEXT: movl (%rdi), %eax +; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: .LBB2_1: # %atomicrmw.start +; X64-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; X64-RAO-INT-NEXT: movl %eax, %edx +; X64-RAO-INT-NEXT: xorl %esi, %edx +; X64-RAO-INT-NEXT: lock cmpxchgl %edx, (%rcx) +; X64-RAO-INT-NEXT: sete %cl +; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: testb $1, %cl +; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: jne .LBB2_2 +; X64-RAO-INT-NEXT: jmp .LBB2_1 +; X64-RAO-INT-NEXT: .LBB2_2: # %atomicrmw.end +; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-RAO-INT-NEXT: retq + %1 = atomicrmw xor i32* %p, i32 %val monotonic + %2 = atomicrmw xor i32* %p, i32 %val acquire + %3 = atomicrmw xor i32* %p, i32 %val release + %4 = atomicrmw xor i32* %p, i32 %val acq_rel + %5 = atomicrmw xor i32* %p, i32 %val seq_cst + + %6 = atomicrmw xor i32* %p, i32 %val seq_cst + ret i32 %6 +} + +define i32 @atomic_and32(i32* nocapture %p, i32 %val) nounwind ssp { +; X86-NO-RAOINT-LABEL: atomic_and32: +; X86-NO-RAOINT: # %bb.0: +; X86-NO-RAOINT-NEXT: pushl %esi +; X86-NO-RAOINT-NEXT: subl $16, %esp +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-RAOINT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: lock andl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock andl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock andl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock andl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: lock andl %ecx, (%eax) +; X86-NO-RAOINT-NEXT: movl (%eax), %eax +; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: .LBB3_1: # %atomicrmw.start +; X86-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NO-RAOINT-NEXT: movl %eax, %edx +; X86-NO-RAOINT-NEXT: andl %esi, %edx +; X86-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-NO-RAOINT-NEXT: sete %cl +; X86-NO-RAOINT-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NO-RAOINT-NEXT: testb $1, %cl +; X86-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NO-RAOINT-NEXT: jne .LBB3_2 +; X86-NO-RAOINT-NEXT: jmp .LBB3_1 +; X86-NO-RAOINT-NEXT: .LBB3_2: # %atomicrmw.end +; X86-NO-RAOINT-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NO-RAOINT-NEXT: addl $16, %esp +; X86-NO-RAOINT-NEXT: popl %esi +; X86-NO-RAOINT-NEXT: retl +; +; X86-RAO-INT-LABEL: atomic_and32: +; X86-RAO-INT: # %bb.0: +; X86-RAO-INT-NEXT: pushl %esi +; X86-RAO-INT-NEXT: subl $16, %esp +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-RAO-INT-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: aandl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aandl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aandl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aandl %ecx, (%eax) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aandl %ecx, (%eax) +; X86-RAO-INT-NEXT: movl (%eax), %eax +; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: .LBB3_1: # %atomicrmw.start +; X86-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-RAO-INT-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-RAO-INT-NEXT: movl %eax, %edx +; X86-RAO-INT-NEXT: andl %esi, %edx +; X86-RAO-INT-NEXT: lock cmpxchgl %edx, (%ecx) +; X86-RAO-INT-NEXT: sete %cl +; X86-RAO-INT-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-RAO-INT-NEXT: testb $1, %cl +; X86-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-RAO-INT-NEXT: jne .LBB3_2 +; X86-RAO-INT-NEXT: jmp .LBB3_1 +; X86-RAO-INT-NEXT: .LBB3_2: # %atomicrmw.end +; X86-RAO-INT-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-RAO-INT-NEXT: addl $16, %esp +; X86-RAO-INT-NEXT: popl %esi +; X86-RAO-INT-NEXT: retl +; +; X64-NO-RAOINT-LABEL: atomic_and32: +; X64-NO-RAOINT: # %bb.0: +; X64-NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NO-RAOINT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: lock andl %esi, (%rdi) +; X64-NO-RAOINT-NEXT: movl (%rdi), %eax +; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: .LBB3_1: # %atomicrmw.start +; X64-NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; X64-NO-RAOINT-NEXT: movl %eax, %edx +; X64-NO-RAOINT-NEXT: andl %esi, %edx +; X64-NO-RAOINT-NEXT: lock cmpxchgl %edx, (%rcx) +; X64-NO-RAOINT-NEXT: sete %cl +; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: testb $1, %cl +; X64-NO-RAOINT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NO-RAOINT-NEXT: jne .LBB3_2 +; X64-NO-RAOINT-NEXT: jmp .LBB3_1 +; X64-NO-RAOINT-NEXT: .LBB3_2: # %atomicrmw.end +; X64-NO-RAOINT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-NO-RAOINT-NEXT: retq +; +; X64-RAO-INT-LABEL: atomic_and32: +; X64-RAO-INT: # %bb.0: +; X64-RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-RAO-INT-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: aandl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aandl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aandl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aandl %esi, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aandl %esi, (%rdi) +; X64-RAO-INT-NEXT: movl (%rdi), %eax +; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: .LBB3_1: # %atomicrmw.start +; X64-RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload +; X64-RAO-INT-NEXT: movl %eax, %edx +; X64-RAO-INT-NEXT: andl %esi, %edx +; X64-RAO-INT-NEXT: lock cmpxchgl %edx, (%rcx) +; X64-RAO-INT-NEXT: sete %cl +; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: testb $1, %cl +; X64-RAO-INT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-RAO-INT-NEXT: jne .LBB3_2 +; X64-RAO-INT-NEXT: jmp .LBB3_1 +; X64-RAO-INT-NEXT: .LBB3_2: # %atomicrmw.end +; X64-RAO-INT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; X64-RAO-INT-NEXT: retq + %1 = atomicrmw and i32* %p, i32 %val monotonic + %2 = atomicrmw and i32* %p, i32 %val acquire + %3 = atomicrmw and i32* %p, i32 %val release + %4 = atomicrmw and i32* %p, i32 %val acq_rel + %5 = atomicrmw and i32* %p, i32 %val seq_cst + + %6 = atomicrmw and i32* %p, i32 %val seq_cst + ret i32 %6 +} + +define i32 @atomic_sub32(i32* nocapture %p, i32 %val) nounwind ssp { +; X86-NO-RAOINT-LABEL: atomic_sub32: +; X86-NO-RAOINT: # %bb.0: +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-RAOINT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: lock subl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: negl %eax +; X86-NO-RAOINT-NEXT: lock xaddl %eax, (%ecx) +; X86-NO-RAOINT-NEXT: retl +; +; X86-RAO-INT-LABEL: atomic_sub32: +; X86-RAO-INT: # %bb.0: +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-RAO-INT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-RAO-INT-NEXT: negl %eax +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: mfence +; X86-RAO-INT-NEXT: aaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: lock xaddl %eax, (%ecx) +; X86-RAO-INT-NEXT: retl +; +; X64-NO-RAOINT-LABEL: atomic_sub32: +; X64-NO-RAOINT: # %bb.0: +; X64-NO-RAOINT-NEXT: movl %esi, %eax +; X64-NO-RAOINT-NEXT: lock subl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: lock subl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: lock subl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: lock subl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: lock subl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: negl %eax +; X64-NO-RAOINT-NEXT: lock xaddl %eax, (%rdi) +; X64-NO-RAOINT-NEXT: retq +; +; X64-RAO-INT-LABEL: atomic_sub32: +; X64-RAO-INT: # %bb.0: +; X64-RAO-INT-NEXT: movl %esi, %eax +; X64-RAO-INT-NEXT: negl %eax +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: mfence +; X64-RAO-INT-NEXT: aaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: lock xaddl %eax, (%rdi) +; X64-RAO-INT-NEXT: retq + %1 = atomicrmw sub i32* %p, i32 %val monotonic + %2 = atomicrmw sub i32* %p, i32 %val acquire + %3 = atomicrmw sub i32* %p, i32 %val release + %4 = atomicrmw sub i32* %p, i32 %val acq_rel + %5 = atomicrmw sub i32* %p, i32 %val seq_cst + + %6 = atomicrmw sub i32* %p, i32 %val seq_cst + ret i32 %6 +} diff --git a/llvm/test/CodeGen/X86/atomic-instructions-64.ll b/llvm/test/CodeGen/X86/atomic-instructions-64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/atomic-instructions-64.ll @@ -0,0 +1,293 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+raoint | FileCheck %s --check-prefixes=RAO-INT +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=NO-RAOINT + +define i64 @atomic_add64(i64* nocapture %p, i64 %val) nounwind ssp { +; RAO-INT-LABEL: atomic_add64: +; RAO-INT: # %bb.0: +; RAO-INT-NEXT: movq %rsi, %rax +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: lock xaddq %rax, (%rdi) +; RAO-INT-NEXT: retq +; +; NO-RAOINT-LABEL: atomic_add64: +; NO-RAOINT: # %bb.0: +; NO-RAOINT-NEXT: movq %rsi, %rax +; NO-RAOINT-NEXT: lock addq %rax, (%rdi) +; NO-RAOINT-NEXT: lock addq %rax, (%rdi) +; NO-RAOINT-NEXT: lock addq %rax, (%rdi) +; NO-RAOINT-NEXT: lock addq %rax, (%rdi) +; NO-RAOINT-NEXT: lock addq %rax, (%rdi) +; NO-RAOINT-NEXT: lock xaddq %rax, (%rdi) +; NO-RAOINT-NEXT: retq + %1 = atomicrmw add i64* %p, i64 %val monotonic + %2 = atomicrmw add i64* %p, i64 %val acquire + %3 = atomicrmw add i64* %p, i64 %val release + %4 = atomicrmw add i64* %p, i64 %val acq_rel + %5 = atomicrmw add i64* %p, i64 %val seq_cst + + %6 = atomicrmw add i64* %p, i64 %val seq_cst + ret i64 %6 +} + +define i64 @atomic_or64(i64* nocapture %p, i64 %val) nounwind ssp { +; RAO-INT-LABEL: atomic_or64: +; RAO-INT: # %bb.0: +; RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: aorq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aorq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aorq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aorq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aorq %rsi, (%rdi) +; RAO-INT-NEXT: movq (%rdi), %rax +; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: .LBB1_1: # %atomicrmw.start +; RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; RAO-INT-NEXT: movq %rax, %rdx +; RAO-INT-NEXT: orq %rsi, %rdx +; RAO-INT-NEXT: lock cmpxchgq %rdx, (%rcx) +; RAO-INT-NEXT: sete %cl +; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: testb $1, %cl +; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: jne .LBB1_2 +; RAO-INT-NEXT: jmp .LBB1_1 +; RAO-INT-NEXT: .LBB1_2: # %atomicrmw.end +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; RAO-INT-NEXT: retq +; +; NO-RAOINT-LABEL: atomic_or64: +; NO-RAOINT: # %bb.0: +; NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: lock orq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock orq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock orq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock orq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock orq %rsi, (%rdi) +; NO-RAOINT-NEXT: movq (%rdi), %rax +; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: .LBB1_1: # %atomicrmw.start +; NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; NO-RAOINT-NEXT: movq %rax, %rdx +; NO-RAOINT-NEXT: orq %rsi, %rdx +; NO-RAOINT-NEXT: lock cmpxchgq %rdx, (%rcx) +; NO-RAOINT-NEXT: sete %cl +; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: testb $1, %cl +; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: jne .LBB1_2 +; NO-RAOINT-NEXT: jmp .LBB1_1 +; NO-RAOINT-NEXT: .LBB1_2: # %atomicrmw.end +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; NO-RAOINT-NEXT: retq + %1 = atomicrmw or i64* %p, i64 %val monotonic + %2 = atomicrmw or i64* %p, i64 %val acquire + %3 = atomicrmw or i64* %p, i64 %val release + %4 = atomicrmw or i64* %p, i64 %val acq_rel + %5 = atomicrmw or i64* %p, i64 %val seq_cst + + %6 = atomicrmw or i64* %p, i64 %val seq_cst + ret i64 %6 +} + +define i64 @atomic_xor64(i64* nocapture %p, i64 %val) nounwind ssp { +; RAO-INT-LABEL: atomic_xor64: +; RAO-INT: # %bb.0: +; RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: axorq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: axorq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: axorq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: axorq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: axorq %rsi, (%rdi) +; RAO-INT-NEXT: movq (%rdi), %rax +; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: .LBB2_1: # %atomicrmw.start +; RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; RAO-INT-NEXT: movq %rax, %rdx +; RAO-INT-NEXT: xorq %rsi, %rdx +; RAO-INT-NEXT: lock cmpxchgq %rdx, (%rcx) +; RAO-INT-NEXT: sete %cl +; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: testb $1, %cl +; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: jne .LBB2_2 +; RAO-INT-NEXT: jmp .LBB2_1 +; RAO-INT-NEXT: .LBB2_2: # %atomicrmw.end +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; RAO-INT-NEXT: retq +; +; NO-RAOINT-LABEL: atomic_xor64: +; NO-RAOINT: # %bb.0: +; NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock xorq %rsi, (%rdi) +; NO-RAOINT-NEXT: movq (%rdi), %rax +; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: .LBB2_1: # %atomicrmw.start +; NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; NO-RAOINT-NEXT: movq %rax, %rdx +; NO-RAOINT-NEXT: xorq %rsi, %rdx +; NO-RAOINT-NEXT: lock cmpxchgq %rdx, (%rcx) +; NO-RAOINT-NEXT: sete %cl +; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: testb $1, %cl +; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: jne .LBB2_2 +; NO-RAOINT-NEXT: jmp .LBB2_1 +; NO-RAOINT-NEXT: .LBB2_2: # %atomicrmw.end +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; NO-RAOINT-NEXT: retq + %1 = atomicrmw xor i64* %p, i64 %val monotonic + %2 = atomicrmw xor i64* %p, i64 %val acquire + %3 = atomicrmw xor i64* %p, i64 %val release + %4 = atomicrmw xor i64* %p, i64 %val acq_rel + %5 = atomicrmw xor i64* %p, i64 %val seq_cst + + %6 = atomicrmw xor i64* %p, i64 %val seq_cst + ret i64 %6 +} + +define i64 @atomic_and64(i64* nocapture %p, i64 %val) nounwind ssp { +; RAO-INT-LABEL: atomic_and64: +; RAO-INT: # %bb.0: +; RAO-INT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: aandq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aandq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aandq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aandq %rsi, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aandq %rsi, (%rdi) +; RAO-INT-NEXT: movq (%rdi), %rax +; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: .LBB3_1: # %atomicrmw.start +; RAO-INT-NEXT: # =>This Inner Loop Header: Depth=1 +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; RAO-INT-NEXT: movq %rax, %rdx +; RAO-INT-NEXT: andq %rsi, %rdx +; RAO-INT-NEXT: lock cmpxchgq %rdx, (%rcx) +; RAO-INT-NEXT: sete %cl +; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: testb $1, %cl +; RAO-INT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; RAO-INT-NEXT: jne .LBB3_2 +; RAO-INT-NEXT: jmp .LBB3_1 +; RAO-INT-NEXT: .LBB3_2: # %atomicrmw.end +; RAO-INT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; RAO-INT-NEXT: retq +; +; NO-RAOINT-LABEL: atomic_and64: +; NO-RAOINT: # %bb.0: +; NO-RAOINT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: lock andq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock andq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock andq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock andq %rsi, (%rdi) +; NO-RAOINT-NEXT: lock andq %rsi, (%rdi) +; NO-RAOINT-NEXT: movq (%rdi), %rax +; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: .LBB3_1: # %atomicrmw.start +; NO-RAOINT-NEXT: # =>This Inner Loop Header: Depth=1 +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; NO-RAOINT-NEXT: movq %rax, %rdx +; NO-RAOINT-NEXT: andq %rsi, %rdx +; NO-RAOINT-NEXT: lock cmpxchgq %rdx, (%rcx) +; NO-RAOINT-NEXT: sete %cl +; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: testb $1, %cl +; NO-RAOINT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-RAOINT-NEXT: jne .LBB3_2 +; NO-RAOINT-NEXT: jmp .LBB3_1 +; NO-RAOINT-NEXT: .LBB3_2: # %atomicrmw.end +; NO-RAOINT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; NO-RAOINT-NEXT: retq + %1 = atomicrmw and i64* %p, i64 %val monotonic + %2 = atomicrmw and i64* %p, i64 %val acquire + %3 = atomicrmw and i64* %p, i64 %val release + %4 = atomicrmw and i64* %p, i64 %val acq_rel + %5 = atomicrmw and i64* %p, i64 %val seq_cst + + %6 = atomicrmw and i64* %p, i64 %val seq_cst + ret i64 %6 +} + +define i64 @atomic_sub64(i64* nocapture %p, i64 %val) nounwind ssp { +; RAO-INT-LABEL: atomic_sub64: +; RAO-INT: # %bb.0: +; RAO-INT-NEXT: movq %rsi, %rax +; RAO-INT-NEXT: negq %rax +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: mfence +; RAO-INT-NEXT: aaddq %rax, (%rdi) +; RAO-INT-NEXT: lock xaddq %rax, (%rdi) +; RAO-INT-NEXT: retq +; +; NO-RAOINT-LABEL: atomic_sub64: +; NO-RAOINT: # %bb.0: +; NO-RAOINT-NEXT: movq %rsi, %rax +; NO-RAOINT-NEXT: lock subq %rax, (%rdi) +; NO-RAOINT-NEXT: lock subq %rax, (%rdi) +; NO-RAOINT-NEXT: lock subq %rax, (%rdi) +; NO-RAOINT-NEXT: lock subq %rax, (%rdi) +; NO-RAOINT-NEXT: lock subq %rax, (%rdi) +; NO-RAOINT-NEXT: negq %rax +; NO-RAOINT-NEXT: lock xaddq %rax, (%rdi) +; NO-RAOINT-NEXT: retq + %1 = atomicrmw sub i64* %p, i64 %val monotonic + %2 = atomicrmw sub i64* %p, i64 %val acquire + %3 = atomicrmw sub i64* %p, i64 %val release + %4 = atomicrmw sub i64* %p, i64 %val acq_rel + %5 = atomicrmw sub i64* %p, i64 %val seq_cst + + %6 = atomicrmw sub i64* %p, i64 %val seq_cst + ret i64 %6 +} diff --git a/llvm/test/MC/Disassembler/X86/rao-int.txt b/llvm/test/MC/Disassembler/X86/rao-int.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/rao-int.txt @@ -0,0 +1,98 @@ +# RUN: llvm-mc --disassemble %s -triple=i686 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: aaddl %ebx, 268435456(%esp,%esi,8) +# INTEL: aadd dword ptr [esp + 8*esi + 268435456], ebx +0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10 + +# ATT: aaddl %ebx, 291(%edi,%eax,4) +# INTEL: aadd dword ptr [edi + 4*eax + 291], ebx +0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00 + +# ATT: aaddl %ebx, (%eax) +# INTEL: aadd dword ptr [eax], ebx +0x0f,0x38,0xfc,0x18 + +# ATT: aaddl %ebx, -512(,%ebp,2) +# INTEL: aadd dword ptr [2*ebp - 512], ebx +0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: aaddl %ebx, 2032(%ecx) +# INTEL: aadd dword ptr [ecx + 2032], ebx +0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00 + +# ATT: aaddl %ebx, -2048(%edx) +# INTEL: aadd dword ptr [edx - 2048], ebx +0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff + +# ATT: aandl %ebx, 268435456(%esp,%esi,8) +# INTEL: aand dword ptr [esp + 8*esi + 268435456], ebx +0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10 + +# ATT: aandl %ebx, 291(%edi,%eax,4) +# INTEL: aand dword ptr [edi + 4*eax + 291], ebx +0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00 + +# ATT: aandl %ebx, (%eax) +# INTEL: aand dword ptr [eax], ebx +0x66,0x0f,0x38,0xfc,0x18 + +# ATT: aandl %ebx, -512(,%ebp,2) +# INTEL: aand dword ptr [2*ebp - 512], ebx +0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: aandl %ebx, 2032(%ecx) +# INTEL: aand dword ptr [ecx + 2032], ebx +0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00 + +# ATT: aandl %ebx, -2048(%edx) +# INTEL: aand dword ptr [edx - 2048], ebx +0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff + +# ATT: aorl %ebx, 268435456(%esp,%esi,8) +# INTEL: aor dword ptr [esp + 8*esi + 268435456], ebx +0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10 + +# ATT: aorl %ebx, 291(%edi,%eax,4) +# INTEL: aor dword ptr [edi + 4*eax + 291], ebx +0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00 + +# ATT: aorl %ebx, (%eax) +# INTEL: aor dword ptr [eax], ebx +0xf2,0x0f,0x38,0xfc,0x18 + +# ATT: aorl %ebx, -512(,%ebp,2) +# INTEL: aor dword ptr [2*ebp - 512], ebx +0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: aorl %ebx, 2032(%ecx) +# INTEL: aor dword ptr [ecx + 2032], ebx +0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00 + +# ATT: aorl %ebx, -2048(%edx) +# INTEL: aor dword ptr [edx - 2048], ebx +0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff + +# ATT: axorl %ebx, 268435456(%esp,%esi,8) +# INTEL: axor dword ptr [esp + 8*esi + 268435456], ebx +0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10 + +# ATT: axorl %ebx, 291(%edi,%eax,4) +# INTEL: axor dword ptr [edi + 4*eax + 291], ebx +0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00 + +# ATT: axorl %ebx, (%eax) +# INTEL: axor dword ptr [eax], ebx +0xf3,0x0f,0x38,0xfc,0x18 + +# ATT: axorl %ebx, -512(,%ebp,2) +# INTEL: axor dword ptr [2*ebp - 512], ebx +0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: axorl %ebx, 2032(%ecx) +# INTEL: axor dword ptr [ecx + 2032], ebx +0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00 + +# ATT: axorl %ebx, -2048(%edx) +# INTEL: axor dword ptr [edx - 2048], ebx +0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff diff --git a/llvm/test/MC/Disassembler/X86/x86-64-rao-int.txt b/llvm/test/MC/Disassembler/X86/x86-64-rao-int.txt new file mode 100644 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/x86-64-rao-int.txt @@ -0,0 +1,194 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: aaddq %r9, 268435456(%rbp,%r14,8) +# INTEL: aadd qword ptr [rbp + 8*r14 + 268435456], r9 +0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10 + +# ATT: aaddq %r9, 291(%r8,%rax,4) +# INTEL: aadd qword ptr [r8 + 4*rax + 291], r9 +0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00 + +# ATT: aaddq %r9, (%rip) +# INTEL: aadd qword ptr [rip], r9 +0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00 + +# ATT: aaddq %r9, -512(,%rbp,2) +# INTEL: aadd qword ptr [2*rbp - 512], r9 +0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: aaddq %r9, 2032(%rcx) +# INTEL: aadd qword ptr [rcx + 2032], r9 +0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00 + +# ATT: aaddq %r9, -2048(%rdx) +# INTEL: aadd qword ptr [rdx - 2048], r9 +0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff + +# ATT: aaddl %ebx, 268435456(%esp,%esi,8) +# INTEL: aadd dword ptr [esp + 8*esi + 268435456], ebx +0x67,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10 + +# ATT: aaddl %ebx, 291(%edi,%eax,4) +# INTEL: aadd dword ptr [edi + 4*eax + 291], ebx +0x67,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00 + +# ATT: aaddl %ebx, (%eax) +# INTEL: aadd dword ptr [eax], ebx +0x67,0x0f,0x38,0xfc,0x18 + +# ATT: aaddl %ebx, -512(,%ebp,2) +# INTEL: aadd dword ptr [2*ebp - 512], ebx +0x67,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: aaddl %ebx, 2032(%ecx) +# INTEL: aadd dword ptr [ecx + 2032], ebx +0x67,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00 + +# ATT: aaddl %ebx, -2048(%edx) +# INTEL: aadd dword ptr [edx - 2048], ebx +0x67,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff + +# ATT: aandq %r9, 268435456(%rbp,%r14,8) +# INTEL: aand qword ptr [rbp + 8*r14 + 268435456], r9 +0x66,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10 + +# ATT: aandq %r9, 291(%r8,%rax,4) +# INTEL: aand qword ptr [r8 + 4*rax + 291], r9 +0x66,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00 + +# ATT: aandq %r9, (%rip) +# INTEL: aand qword ptr [rip], r9 +0x66,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00 + +# ATT: aandq %r9, -512(,%rbp,2) +# INTEL: aand qword ptr [2*rbp - 512], r9 +0x66,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: aandq %r9, 2032(%rcx) +# INTEL: aand qword ptr [rcx + 2032], r9 +0x66,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00 + +# ATT: aandq %r9, -2048(%rdx) +# INTEL: aand qword ptr [rdx - 2048], r9 +0x66,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff + +# ATT: aandl %ebx, 268435456(%esp,%esi,8) +# INTEL: aand dword ptr [esp + 8*esi + 268435456], ebx +0x67,0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10 + +# ATT: aandl %ebx, 291(%edi,%eax,4) +# INTEL: aand dword ptr [edi + 4*eax + 291], ebx +0x67,0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00 + +# ATT: aandl %ebx, (%eax) +# INTEL: aand dword ptr [eax], ebx +0x67,0x66,0x0f,0x38,0xfc,0x18 + +# ATT: aandl %ebx, -512(,%ebp,2) +# INTEL: aand dword ptr [2*ebp - 512], ebx +0x67,0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: aandl %ebx, 2032(%ecx) +# INTEL: aand dword ptr [ecx + 2032], ebx +0x67,0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00 + +# ATT: aandl %ebx, -2048(%edx) +# INTEL: aand dword ptr [edx - 2048], ebx +0x67,0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff + +# ATT: aorq %r9, 268435456(%rbp,%r14,8) +# INTEL: aor qword ptr [rbp + 8*r14 + 268435456], r9 +0xf2,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10 + +# ATT: aorq %r9, 291(%r8,%rax,4) +# INTEL: aor qword ptr [r8 + 4*rax + 291], r9 +0xf2,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00 + +# ATT: aorq %r9, (%rip) +# INTEL: aor qword ptr [rip], r9 +0xf2,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00 + +# ATT: aorq %r9, -512(,%rbp,2) +# INTEL: aor qword ptr [2*rbp - 512], r9 +0xf2,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: aorq %r9, 2032(%rcx) +# INTEL: aor qword ptr [rcx + 2032], r9 +0xf2,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00 + +# ATT: aorq %r9, -2048(%rdx) +# INTEL: aor qword ptr [rdx - 2048], r9 +0xf2,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff + +# ATT: aorl %ebx, 268435456(%esp,%esi,8) +# INTEL: aor dword ptr [esp + 8*esi + 268435456], ebx +0x67,0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10 + +# ATT: aorl %ebx, 291(%edi,%eax,4) +# INTEL: aor dword ptr [edi + 4*eax + 291], ebx +0x67,0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00 + +# ATT: aorl %ebx, (%eax) +# INTEL: aor dword ptr [eax], ebx +0x67,0xf2,0x0f,0x38,0xfc,0x18 + +# ATT: aorl %ebx, -512(,%ebp,2) +# INTEL: aor dword ptr [2*ebp - 512], ebx +0x67,0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: aorl %ebx, 2032(%ecx) +# INTEL: aor dword ptr [ecx + 2032], ebx +0x67,0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00 + +# ATT: aorl %ebx, -2048(%edx) +# INTEL: aor dword ptr [edx - 2048], ebx +0x67,0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff + +# ATT: axorq %r9, 268435456(%rbp,%r14,8) +# INTEL: axor qword ptr [rbp + 8*r14 + 268435456], r9 +0xf3,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10 + +# ATT: axorq %r9, 291(%r8,%rax,4) +# INTEL: axor qword ptr [r8 + 4*rax + 291], r9 +0xf3,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00 + +# ATT: axorq %r9, (%rip) +# INTEL: axor qword ptr [rip], r9 +0xf3,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00 + +# ATT: axorq %r9, -512(,%rbp,2) +# INTEL: axor qword ptr [2*rbp - 512], r9 +0xf3,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: axorq %r9, 2032(%rcx) +# INTEL: axor qword ptr [rcx + 2032], r9 +0xf3,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00 + +# ATT: axorq %r9, -2048(%rdx) +# INTEL: axor qword ptr [rdx - 2048], r9 +0xf3,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff + +# ATT: axorl %ebx, 268435456(%esp,%esi,8) +# INTEL: axor dword ptr [esp + 8*esi + 268435456], ebx +0x67,0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10 + +# ATT: axorl %ebx, 291(%edi,%eax,4) +# INTEL: axor dword ptr [edi + 4*eax + 291], ebx +0x67,0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00 + +# ATT: axorl %ebx, (%eax) +# INTEL: axor dword ptr [eax], ebx +0x67,0xf3,0x0f,0x38,0xfc,0x18 + +# ATT: axorl %ebx, -512(,%ebp,2) +# INTEL: axor dword ptr [2*ebp - 512], ebx +0x67,0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff + +# ATT: axorl %ebx, 2032(%ecx) +# INTEL: axor dword ptr [ecx + 2032], ebx +0x67,0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00 + +# ATT: axorl %ebx, -2048(%edx) +# INTEL: axor dword ptr [edx - 2048], ebx +0x67,0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff diff --git a/llvm/test/MC/X86/rao-int-att.s b/llvm/test/MC/X86/rao-int-att.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/rao-int-att.s @@ -0,0 +1,97 @@ +// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: aaddl %ebx, 268435456(%esp,%esi,8) +// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aaddl %ebx, 268435456(%esp,%esi,8) + +// CHECK: aaddl %ebx, 291(%edi,%eax,4) +// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aaddl %ebx, 291(%edi,%eax,4) + +// CHECK: aaddl %ebx, (%eax) +// CHECK: encoding: [0x0f,0x38,0xfc,0x18] + aaddl %ebx, (%eax) + +// CHECK: aaddl %ebx, -512(,%ebp,2) +// CHECK: encoding: [0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aaddl %ebx, -512(,%ebp,2) + +// CHECK: aaddl %ebx, 2032(%ecx) +// CHECK: encoding: [0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aaddl %ebx, 2032(%ecx) + +// CHECK: aaddl %ebx, -2048(%edx) +// CHECK: encoding: [0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aaddl %ebx, -2048(%edx) + +// CHECK: aandl %ebx, 268435456(%esp,%esi,8) +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aandl %ebx, 268435456(%esp,%esi,8) + +// CHECK: aandl %ebx, 291(%edi,%eax,4) +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aandl %ebx, 291(%edi,%eax,4) + +// CHECK: aandl %ebx, (%eax) +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x18] + aandl %ebx, (%eax) + +// CHECK: aandl %ebx, -512(,%ebp,2) +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aandl %ebx, -512(,%ebp,2) + +// CHECK: aandl %ebx, 2032(%ecx) +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aandl %ebx, 2032(%ecx) + +// CHECK: aandl %ebx, -2048(%edx) +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aandl %ebx, -2048(%edx) + +// CHECK: aorl %ebx, 268435456(%esp,%esi,8) +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aorl %ebx, 268435456(%esp,%esi,8) + +// CHECK: aorl %ebx, 291(%edi,%eax,4) +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aorl %ebx, 291(%edi,%eax,4) + +// CHECK: aorl %ebx, (%eax) +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x18] + aorl %ebx, (%eax) + +// CHECK: aorl %ebx, -512(,%ebp,2) +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aorl %ebx, -512(,%ebp,2) + +// CHECK: aorl %ebx, 2032(%ecx) +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aorl %ebx, 2032(%ecx) + +// CHECK: aorl %ebx, -2048(%edx) +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aorl %ebx, -2048(%edx) + +// CHECK: axorl %ebx, 268435456(%esp,%esi,8) +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + axorl %ebx, 268435456(%esp,%esi,8) + +// CHECK: axorl %ebx, 291(%edi,%eax,4) +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + axorl %ebx, 291(%edi,%eax,4) + +// CHECK: axorl %ebx, (%eax) +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x18] + axorl %ebx, (%eax) + +// CHECK: axorl %ebx, -512(,%ebp,2) +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + axorl %ebx, -512(,%ebp,2) + +// CHECK: axorl %ebx, 2032(%ecx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + axorl %ebx, 2032(%ecx) + +// CHECK: axorl %ebx, -2048(%edx) +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + axorl %ebx, -2048(%edx) diff --git a/llvm/test/MC/X86/rao-int-intel.s b/llvm/test/MC/X86/rao-int-intel.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/rao-int-intel.s @@ -0,0 +1,97 @@ +// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: aadd dword ptr [esp + 8*esi + 268435456], ebx +// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aadd dword ptr [esp + 8*esi + 268435456], ebx + +// CHECK: aadd dword ptr [edi + 4*eax + 291], ebx +// CHECK: encoding: [0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aadd dword ptr [edi + 4*eax + 291], ebx + +// CHECK: aadd dword ptr [eax], ebx +// CHECK: encoding: [0x0f,0x38,0xfc,0x18] + aadd dword ptr [eax], ebx + +// CHECK: aadd dword ptr [2*ebp - 512], ebx +// CHECK: encoding: [0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aadd dword ptr [2*ebp - 512], ebx + +// CHECK: aadd dword ptr [ecx + 2032], ebx +// CHECK: encoding: [0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aadd dword ptr [ecx + 2032], ebx + +// CHECK: aadd dword ptr [edx - 2048], ebx +// CHECK: encoding: [0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aadd dword ptr [edx - 2048], ebx + +// CHECK: aand dword ptr [esp + 8*esi + 268435456], ebx +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aand dword ptr [esp + 8*esi + 268435456], ebx + +// CHECK: aand dword ptr [edi + 4*eax + 291], ebx +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aand dword ptr [edi + 4*eax + 291], ebx + +// CHECK: aand dword ptr [eax], ebx +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x18] + aand dword ptr [eax], ebx + +// CHECK: aand dword ptr [2*ebp - 512], ebx +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aand dword ptr [2*ebp - 512], ebx + +// CHECK: aand dword ptr [ecx + 2032], ebx +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aand dword ptr [ecx + 2032], ebx + +// CHECK: aand dword ptr [edx - 2048], ebx +// CHECK: encoding: [0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aand dword ptr [edx - 2048], ebx + +// CHECK: aor dword ptr [esp + 8*esi + 268435456], ebx +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aor dword ptr [esp + 8*esi + 268435456], ebx + +// CHECK: aor dword ptr [edi + 4*eax + 291], ebx +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aor dword ptr [edi + 4*eax + 291], ebx + +// CHECK: aor dword ptr [eax], ebx +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x18] + aor dword ptr [eax], ebx + +// CHECK: aor dword ptr [2*ebp - 512], ebx +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aor dword ptr [2*ebp - 512], ebx + +// CHECK: aor dword ptr [ecx + 2032], ebx +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aor dword ptr [ecx + 2032], ebx + +// CHECK: aor dword ptr [edx - 2048], ebx +// CHECK: encoding: [0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aor dword ptr [edx - 2048], ebx + +// CHECK: axor dword ptr [esp + 8*esi + 268435456], ebx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + axor dword ptr [esp + 8*esi + 268435456], ebx + +// CHECK: axor dword ptr [edi + 4*eax + 291], ebx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + axor dword ptr [edi + 4*eax + 291], ebx + +// CHECK: axor dword ptr [eax], ebx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x18] + axor dword ptr [eax], ebx + +// CHECK: axor dword ptr [2*ebp - 512], ebx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + axor dword ptr [2*ebp - 512], ebx + +// CHECK: axor dword ptr [ecx + 2032], ebx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + axor dword ptr [ecx + 2032], ebx + +// CHECK: axor dword ptr [edx - 2048], ebx +// CHECK: encoding: [0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + axor dword ptr [edx - 2048], ebx diff --git a/llvm/test/MC/X86/x86-64-rao-int-att.s b/llvm/test/MC/X86/x86-64-rao-int-att.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/x86-64-rao-int-att.s @@ -0,0 +1,193 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: aaddq %r9, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10] + aaddq %r9, 268435456(%rbp,%r14,8) + +// CHECK: aaddq %r9, 291(%r8,%rax,4) +// CHECK: encoding: [0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00] + aaddq %r9, 291(%r8,%rax,4) + +// CHECK: aaddq %r9, (%rip) +// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00] + aaddq %r9, (%rip) + +// CHECK: aaddq %r9, -512(,%rbp,2) +// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff] + aaddq %r9, -512(,%rbp,2) + +// CHECK: aaddq %r9, 2032(%rcx) +// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00] + aaddq %r9, 2032(%rcx) + +// CHECK: aaddq %r9, -2048(%rdx) +// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff] + aaddq %r9, -2048(%rdx) + +// CHECK: aaddl %ebx, 268435456(%esp,%esi,8) +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aaddl %ebx, 268435456(%esp,%esi,8) + +// CHECK: aaddl %ebx, 291(%edi,%eax,4) +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aaddl %ebx, 291(%edi,%eax,4) + +// CHECK: aaddl %ebx, (%eax) +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x18] + aaddl %ebx, (%eax) + +// CHECK: aaddl %ebx, -512(,%ebp,2) +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aaddl %ebx, -512(,%ebp,2) + +// CHECK: aaddl %ebx, 2032(%ecx) +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aaddl %ebx, 2032(%ecx) + +// CHECK: aaddl %ebx, -2048(%edx) +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aaddl %ebx, -2048(%edx) + +// CHECK: aandq %r9, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0x66,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10] + aandq %r9, 268435456(%rbp,%r14,8) + +// CHECK: aandq %r9, 291(%r8,%rax,4) +// CHECK: encoding: [0x66,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00] + aandq %r9, 291(%r8,%rax,4) + +// CHECK: aandq %r9, (%rip) +// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00] + aandq %r9, (%rip) + +// CHECK: aandq %r9, -512(,%rbp,2) +// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff] + aandq %r9, -512(,%rbp,2) + +// CHECK: aandq %r9, 2032(%rcx) +// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00] + aandq %r9, 2032(%rcx) + +// CHECK: aandq %r9, -2048(%rdx) +// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff] + aandq %r9, -2048(%rdx) + +// CHECK: aandl %ebx, 268435456(%esp,%esi,8) +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aandl %ebx, 268435456(%esp,%esi,8) + +// CHECK: aandl %ebx, 291(%edi,%eax,4) +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aandl %ebx, 291(%edi,%eax,4) + +// CHECK: aandl %ebx, (%eax) +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x18] + aandl %ebx, (%eax) + +// CHECK: aandl %ebx, -512(,%ebp,2) +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aandl %ebx, -512(,%ebp,2) + +// CHECK: aandl %ebx, 2032(%ecx) +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aandl %ebx, 2032(%ecx) + +// CHECK: aandl %ebx, -2048(%edx) +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aandl %ebx, -2048(%edx) + +// CHECK: aorq %r9, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xf2,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10] + aorq %r9, 268435456(%rbp,%r14,8) + +// CHECK: aorq %r9, 291(%r8,%rax,4) +// CHECK: encoding: [0xf2,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00] + aorq %r9, 291(%r8,%rax,4) + +// CHECK: aorq %r9, (%rip) +// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00] + aorq %r9, (%rip) + +// CHECK: aorq %r9, -512(,%rbp,2) +// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff] + aorq %r9, -512(,%rbp,2) + +// CHECK: aorq %r9, 2032(%rcx) +// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00] + aorq %r9, 2032(%rcx) + +// CHECK: aorq %r9, -2048(%rdx) +// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff] + aorq %r9, -2048(%rdx) + +// CHECK: aorl %ebx, 268435456(%esp,%esi,8) +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aorl %ebx, 268435456(%esp,%esi,8) + +// CHECK: aorl %ebx, 291(%edi,%eax,4) +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aorl %ebx, 291(%edi,%eax,4) + +// CHECK: aorl %ebx, (%eax) +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x18] + aorl %ebx, (%eax) + +// CHECK: aorl %ebx, -512(,%ebp,2) +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aorl %ebx, -512(,%ebp,2) + +// CHECK: aorl %ebx, 2032(%ecx) +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aorl %ebx, 2032(%ecx) + +// CHECK: aorl %ebx, -2048(%edx) +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aorl %ebx, -2048(%edx) + +// CHECK: axorq %r9, 268435456(%rbp,%r14,8) +// CHECK: encoding: [0xf3,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10] + axorq %r9, 268435456(%rbp,%r14,8) + +// CHECK: axorq %r9, 291(%r8,%rax,4) +// CHECK: encoding: [0xf3,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00] + axorq %r9, 291(%r8,%rax,4) + +// CHECK: axorq %r9, (%rip) +// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00] + axorq %r9, (%rip) + +// CHECK: axorq %r9, -512(,%rbp,2) +// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff] + axorq %r9, -512(,%rbp,2) + +// CHECK: axorq %r9, 2032(%rcx) +// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00] + axorq %r9, 2032(%rcx) + +// CHECK: axorq %r9, -2048(%rdx) +// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff] + axorq %r9, -2048(%rdx) + +// CHECK: axorl %ebx, 268435456(%esp,%esi,8) +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + axorl %ebx, 268435456(%esp,%esi,8) + +// CHECK: axorl %ebx, 291(%edi,%eax,4) +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + axorl %ebx, 291(%edi,%eax,4) + +// CHECK: axorl %ebx, (%eax) +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x18] + axorl %ebx, (%eax) + +// CHECK: axorl %ebx, -512(,%ebp,2) +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + axorl %ebx, -512(,%ebp,2) + +// CHECK: axorl %ebx, 2032(%ecx) +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + axorl %ebx, 2032(%ecx) + +// CHECK: axorl %ebx, -2048(%edx) +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + axorl %ebx, -2048(%edx) diff --git a/llvm/test/MC/X86/x86-64-rao-int-intel.s b/llvm/test/MC/X86/x86-64-rao-int-intel.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/X86/x86-64-rao-int-intel.s @@ -0,0 +1,193 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: aadd qword ptr [rbp + 8*r14 + 268435456], r9 +// CHECK: encoding: [0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10] + aadd qword ptr [rbp + 8*r14 + 268435456], r9 + +// CHECK: aadd qword ptr [r8 + 4*rax + 291], r9 +// CHECK: encoding: [0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00] + aadd qword ptr [r8 + 4*rax + 291], r9 + +// CHECK: aadd qword ptr [rip], r9 +// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00] + aadd qword ptr [rip], r9 + +// CHECK: aadd qword ptr [2*rbp - 512], r9 +// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff] + aadd qword ptr [2*rbp - 512], r9 + +// CHECK: aadd qword ptr [rcx + 2032], r9 +// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00] + aadd qword ptr [rcx + 2032], r9 + +// CHECK: aadd qword ptr [rdx - 2048], r9 +// CHECK: encoding: [0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff] + aadd qword ptr [rdx - 2048], r9 + +// CHECK: aadd dword ptr [esp + 8*esi + 268435456], ebx +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aadd dword ptr [esp + 8*esi + 268435456], ebx + +// CHECK: aadd dword ptr [edi + 4*eax + 291], ebx +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aadd dword ptr [edi + 4*eax + 291], ebx + +// CHECK: aadd dword ptr [eax], ebx +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x18] + aadd dword ptr [eax], ebx + +// CHECK: aadd dword ptr [2*ebp - 512], ebx +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aadd dword ptr [2*ebp - 512], ebx + +// CHECK: aadd dword ptr [ecx + 2032], ebx +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aadd dword ptr [ecx + 2032], ebx + +// CHECK: aadd dword ptr [edx - 2048], ebx +// CHECK: encoding: [0x67,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aadd dword ptr [edx - 2048], ebx + +// CHECK: aand qword ptr [rbp + 8*r14 + 268435456], r9 +// CHECK: encoding: [0x66,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10] + aand qword ptr [rbp + 8*r14 + 268435456], r9 + +// CHECK: aand qword ptr [r8 + 4*rax + 291], r9 +// CHECK: encoding: [0x66,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00] + aand qword ptr [r8 + 4*rax + 291], r9 + +// CHECK: aand qword ptr [rip], r9 +// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00] + aand qword ptr [rip], r9 + +// CHECK: aand qword ptr [2*rbp - 512], r9 +// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff] + aand qword ptr [2*rbp - 512], r9 + +// CHECK: aand qword ptr [rcx + 2032], r9 +// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00] + aand qword ptr [rcx + 2032], r9 + +// CHECK: aand qword ptr [rdx - 2048], r9 +// CHECK: encoding: [0x66,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff] + aand qword ptr [rdx - 2048], r9 + +// CHECK: aand dword ptr [esp + 8*esi + 268435456], ebx +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aand dword ptr [esp + 8*esi + 268435456], ebx + +// CHECK: aand dword ptr [edi + 4*eax + 291], ebx +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aand dword ptr [edi + 4*eax + 291], ebx + +// CHECK: aand dword ptr [eax], ebx +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x18] + aand dword ptr [eax], ebx + +// CHECK: aand dword ptr [2*ebp - 512], ebx +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aand dword ptr [2*ebp - 512], ebx + +// CHECK: aand dword ptr [ecx + 2032], ebx +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aand dword ptr [ecx + 2032], ebx + +// CHECK: aand dword ptr [edx - 2048], ebx +// CHECK: encoding: [0x67,0x66,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aand dword ptr [edx - 2048], ebx + +// CHECK: aor qword ptr [rbp + 8*r14 + 268435456], r9 +// CHECK: encoding: [0xf2,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10] + aor qword ptr [rbp + 8*r14 + 268435456], r9 + +// CHECK: aor qword ptr [r8 + 4*rax + 291], r9 +// CHECK: encoding: [0xf2,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00] + aor qword ptr [r8 + 4*rax + 291], r9 + +// CHECK: aor qword ptr [rip], r9 +// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00] + aor qword ptr [rip], r9 + +// CHECK: aor qword ptr [2*rbp - 512], r9 +// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff] + aor qword ptr [2*rbp - 512], r9 + +// CHECK: aor qword ptr [rcx + 2032], r9 +// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00] + aor qword ptr [rcx + 2032], r9 + +// CHECK: aor qword ptr [rdx - 2048], r9 +// CHECK: encoding: [0xf2,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff] + aor qword ptr [rdx - 2048], r9 + +// CHECK: aor dword ptr [esp + 8*esi + 268435456], ebx +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + aor dword ptr [esp + 8*esi + 268435456], ebx + +// CHECK: aor dword ptr [edi + 4*eax + 291], ebx +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + aor dword ptr [edi + 4*eax + 291], ebx + +// CHECK: aor dword ptr [eax], ebx +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x18] + aor dword ptr [eax], ebx + +// CHECK: aor dword ptr [2*ebp - 512], ebx +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + aor dword ptr [2*ebp - 512], ebx + +// CHECK: aor dword ptr [ecx + 2032], ebx +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + aor dword ptr [ecx + 2032], ebx + +// CHECK: aor dword ptr [edx - 2048], ebx +// CHECK: encoding: [0x67,0xf2,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + aor dword ptr [edx - 2048], ebx + +// CHECK: axor qword ptr [rbp + 8*r14 + 268435456], r9 +// CHECK: encoding: [0xf3,0x4e,0x0f,0x38,0xfc,0x8c,0xf5,0x00,0x00,0x00,0x10] + axor qword ptr [rbp + 8*r14 + 268435456], r9 + +// CHECK: axor qword ptr [r8 + 4*rax + 291], r9 +// CHECK: encoding: [0xf3,0x4d,0x0f,0x38,0xfc,0x8c,0x80,0x23,0x01,0x00,0x00] + axor qword ptr [r8 + 4*rax + 291], r9 + +// CHECK: axor qword ptr [rip], r9 +// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0d,0x00,0x00,0x00,0x00] + axor qword ptr [rip], r9 + +// CHECK: axor qword ptr [2*rbp - 512], r9 +// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x0c,0x6d,0x00,0xfe,0xff,0xff] + axor qword ptr [2*rbp - 512], r9 + +// CHECK: axor qword ptr [rcx + 2032], r9 +// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x89,0xf0,0x07,0x00,0x00] + axor qword ptr [rcx + 2032], r9 + +// CHECK: axor qword ptr [rdx - 2048], r9 +// CHECK: encoding: [0xf3,0x4c,0x0f,0x38,0xfc,0x8a,0x00,0xf8,0xff,0xff] + axor qword ptr [rdx - 2048], r9 + +// CHECK: axor dword ptr [esp + 8*esi + 268435456], ebx +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0xf4,0x00,0x00,0x00,0x10] + axor dword ptr [esp + 8*esi + 268435456], ebx + +// CHECK: axor dword ptr [edi + 4*eax + 291], ebx +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9c,0x87,0x23,0x01,0x00,0x00] + axor dword ptr [edi + 4*eax + 291], ebx + +// CHECK: axor dword ptr [eax], ebx +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x18] + axor dword ptr [eax], ebx + +// CHECK: axor dword ptr [2*ebp - 512], ebx +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x1c,0x6d,0x00,0xfe,0xff,0xff] + axor dword ptr [2*ebp - 512], ebx + +// CHECK: axor dword ptr [ecx + 2032], ebx +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x99,0xf0,0x07,0x00,0x00] + axor dword ptr [ecx + 2032], ebx + +// CHECK: axor dword ptr [edx - 2048], ebx +// CHECK: encoding: [0x67,0xf3,0x0f,0x38,0xfc,0x9a,0x00,0xf8,0xff,0xff] + axor dword ptr [edx - 2048], ebx