Index: lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h =================================================================== --- lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -152,6 +152,10 @@ case WebAssembly::ATOMIC_RMW8_U_XCHG_I32_S: case WebAssembly::ATOMIC_RMW8_U_XCHG_I64: case WebAssembly::ATOMIC_RMW8_U_XCHG_I64_S: + case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32: + case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32_S: + case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64: + case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64_S: return 0; case WebAssembly::LOAD16_S_I32: case WebAssembly::LOAD16_S_I32_S: @@ -197,6 +201,10 @@ case WebAssembly::ATOMIC_RMW16_U_XCHG_I32_S: case WebAssembly::ATOMIC_RMW16_U_XCHG_I64: case WebAssembly::ATOMIC_RMW16_U_XCHG_I64_S: + case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32: + case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32_S: + case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64: + case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64_S: return 1; case WebAssembly::LOAD_I32: case WebAssembly::LOAD_I32_S: @@ -244,6 +252,10 @@ case WebAssembly::ATOMIC_RMW_XCHG_I32_S: case WebAssembly::ATOMIC_RMW32_U_XCHG_I64: case WebAssembly::ATOMIC_RMW32_U_XCHG_I64_S: + case WebAssembly::ATOMIC_RMW_CMPXCHG_I32: + case WebAssembly::ATOMIC_RMW_CMPXCHG_I32_S: + case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64: + case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64_S: return 2; case WebAssembly::LOAD_I64: case WebAssembly::LOAD_I64_S: @@ -269,6 +281,8 @@ case WebAssembly::ATOMIC_RMW_XOR_I64_S: case WebAssembly::ATOMIC_RMW_XCHG_I64: case WebAssembly::ATOMIC_RMW_XCHG_I64_S: + case WebAssembly::ATOMIC_RMW_CMPXCHG_I64: + case WebAssembly::ATOMIC_RMW_CMPXCHG_I64_S: return 3; default: llvm_unreachable("Only loads and stores have p2align values"); Index: lib/Target/WebAssembly/WebAssemblyInstrAtomics.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -655,3 +655,246 @@ ATOMIC_RMW8_U_XCHG_I32, ATOMIC_RMW16_U_XCHG_I32, ATOMIC_RMW8_U_XCHG_I64, ATOMIC_RMW16_U_XCHG_I64, ATOMIC_RMW32_U_XCHG_I64>; } // Predicates = [HasAtomics] + +//===----------------------------------------------------------------------===// +// Atomic ternary read-modify-writes +//===----------------------------------------------------------------------===// + +// TODO LLVM IR's cmpxchg instruction returns a pair of {loaded value, +// success flag}. When we use a success flag or both values, we can't make use +// of truncate/extend versions of instructions for now, which is suboptimal. Add +// selection rules for those cases too. + +let Defs = [ARGUMENTS] in { + +multiclass WebAssemblyTerRMW { + defm "" : I<(outs rc:$dst), + (ins P2Align:$p2align, offset32_op:$off, I32:$addr, rc:$exp, + rc:$new), + (outs), (ins P2Align:$p2align, offset32_op:$off), [], + !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}, $exp, $new"), + !strconcat(Name, "\t${off}, ${p2align}"), Opcode>; +} + +defm ATOMIC_RMW_CMPXCHG_I32 : + WebAssemblyTerRMW; +defm ATOMIC_RMW_CMPXCHG_I64 : + WebAssemblyTerRMW; +defm ATOMIC_RMW8_U_CMPXCHG_I32 : + WebAssemblyTerRMW; +defm ATOMIC_RMW16_U_CMPXCHG_I32 : + WebAssemblyTerRMW; +defm ATOMIC_RMW8_U_CMPXCHG_I64 : + WebAssemblyTerRMW; +defm ATOMIC_RMW16_U_CMPXCHG_I64 : + WebAssemblyTerRMW; +defm ATOMIC_RMW32_U_CMPXCHG_I64 : + WebAssemblyTerRMW; +} + +// Select ternary RMWs with no constant offset. +class TerRMWPatNoOffset : + Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)), + (inst 0, 0, I32:$addr, ty:$exp, ty:$new)>; + +// Select ternary RMWs with a constant offset. + +// Pattern with address + immediate offset +class TerRMWPatImmOff : + Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)), + (inst 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>; + +class TerRMWPatGlobalAddr : + Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), + ty:$exp, ty:$new)), + (inst 0, tglobaladdr:$off, I32:$addr, ty:$exp, ty:$new)>; + +class TerRMWPatExternalSym : + Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)), + ty:$exp, ty:$new)), + (inst 0, texternalsym:$off, I32:$addr, ty:$exp, ty:$new)>; + +// Select ternary RMWs with just a constant offset. +class TerRMWPatOffsetOnly : + Pat<(ty (kind imm:$off, ty:$exp, ty:$new)), + (inst 0, imm:$off, (CONST_I32 0), ty:$exp, ty:$new)>; + +class TerRMWPatGlobalAddrOffOnly : + Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)), + (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, ty:$new)>; + +class TerRMWPatExternSymOffOnly : + Pat<(ty (kind (WebAssemblywrapper texternalsym:$off), ty:$exp, ty:$new)), + (inst 0, texternalsym:$off, (CONST_I32 0), ty:$exp, ty:$new)>; + +// Patterns for various addressing modes. +multiclass TerRMWPattern { + def : TerRMWPatNoOffset; + def : TerRMWPatNoOffset; + + def : TerRMWPatImmOff; + def : TerRMWPatImmOff; + def : TerRMWPatImmOff; + def : TerRMWPatImmOff; + + def : TerRMWPatGlobalAddr; + def : TerRMWPatGlobalAddr; + + def : TerRMWPatExternalSym; + def : TerRMWPatExternalSym; + + def : TerRMWPatOffsetOnly; + def : TerRMWPatOffsetOnly; + + def : TerRMWPatGlobalAddrOffOnly; + def : TerRMWPatGlobalAddrOffOnly; + + def : TerRMWPatExternSymOffOnly; + def : TerRMWPatExternSymOffOnly; +} + +let Predicates = [HasAtomics] in { +defm : TerRMWPattern; +} // Predicates = [HasAtomics] + +// Truncating & zero-extending ternary RMW patterns. +// DAG legalization & optimization before instruction selection may introduce +// additional nodes such as anyext or assertzext depending on operand types. +class zext_ter_rmw_8_32 : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (and (i32 (kind node:$addr, node:$exp, node:$new)), 255)>; +class zext_ter_rmw_16_32 : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (and (i32 (kind node:$addr, node:$exp, node:$new)), 65535)>; +class zext_ter_rmw_8_64 : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (zext (i32 (assertzext (i32 (kind node:$addr, + (i32 (trunc (i64 node:$exp))), + (i32 (trunc (i64 node:$new))))))))>; +class zext_ter_rmw_16_64 : zext_ter_rmw_8_64; +class zext_ter_rmw_32_64 : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (zext (i32 (kind node:$addr, + (i32 (trunc (i64 node:$exp))), + (i32 (trunc (i64 node:$new))))))>; + +// Truncating & sign-extending ternary RMW patterns. +// We match subword RMWs (for 32-bit) and anyext RMWs (for 64-bit) and select a +// zext RMW; the next instruction will be sext_inreg which is selected by +// itself. +class sext_ter_rmw_8_32 : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (kind node:$addr, node:$exp, node:$new)>; +class sext_ter_rmw_16_32 : sext_ter_rmw_8_32; +class sext_ter_rmw_8_64 : + PatFrag<(ops node:$addr, node:$exp, node:$new), + (anyext (i32 (assertzext (i32 + (kind node:$addr, + (i32 (trunc (i64 node:$exp))), + (i32 (trunc (i64 node:$new))))))))>; +class sext_ter_rmw_16_64 : sext_ter_rmw_8_64; +// 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_s/i32 + +// Patterns for various addressing modes for truncating-extending ternary RMWs. +multiclass TerRMWTruncExtPattern< + PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64, + NI inst8_32, NI inst16_32, NI inst8_64, NI inst16_64, NI inst32_64> { + // Truncating-extending ternary RMWs with no constant offset + def : TerRMWPatNoOffset, inst8_32>; + def : TerRMWPatNoOffset, inst16_32>; + def : TerRMWPatNoOffset, inst8_64>; + def : TerRMWPatNoOffset, inst16_64>; + def : TerRMWPatNoOffset, inst32_64>; + + def : TerRMWPatNoOffset, inst8_32>; + def : TerRMWPatNoOffset, inst16_32>; + def : TerRMWPatNoOffset, inst8_64>; + def : TerRMWPatNoOffset, inst16_64>; + + // Truncating-extending ternary RMWs with a constant offset + def : TerRMWPatImmOff, regPlusImm, inst8_32>; + def : TerRMWPatImmOff, regPlusImm, inst16_32>; + def : TerRMWPatImmOff, regPlusImm, inst8_64>; + def : TerRMWPatImmOff, regPlusImm, inst16_64>; + def : TerRMWPatImmOff, regPlusImm, inst32_64>; + def : TerRMWPatImmOff, or_is_add, inst8_32>; + def : TerRMWPatImmOff, or_is_add, inst16_32>; + def : TerRMWPatImmOff, or_is_add, inst8_64>; + def : TerRMWPatImmOff, or_is_add, inst16_64>; + def : TerRMWPatImmOff, or_is_add, inst32_64>; + + def : TerRMWPatImmOff, regPlusImm, inst8_32>; + def : TerRMWPatImmOff, regPlusImm, inst16_32>; + def : TerRMWPatImmOff, regPlusImm, inst8_64>; + def : TerRMWPatImmOff, regPlusImm, inst16_64>; + def : TerRMWPatImmOff, or_is_add, inst8_32>; + def : TerRMWPatImmOff, or_is_add, inst16_32>; + def : TerRMWPatImmOff, or_is_add, inst8_64>; + def : TerRMWPatImmOff, or_is_add, inst16_64>; + + def : TerRMWPatGlobalAddr, inst8_32>; + def : TerRMWPatGlobalAddr, inst16_32>; + def : TerRMWPatGlobalAddr, inst8_64>; + def : TerRMWPatGlobalAddr, inst16_64>; + def : TerRMWPatGlobalAddr, inst32_64>; + + def : TerRMWPatGlobalAddr, inst8_32>; + def : TerRMWPatGlobalAddr, inst16_32>; + def : TerRMWPatGlobalAddr, inst8_64>; + def : TerRMWPatGlobalAddr, inst16_64>; + + def : TerRMWPatExternalSym, inst8_32>; + def : TerRMWPatExternalSym, inst16_32>; + def : TerRMWPatExternalSym, inst8_64>; + def : TerRMWPatExternalSym, inst16_64>; + def : TerRMWPatExternalSym, inst32_64>; + + def : TerRMWPatExternalSym, inst8_32>; + def : TerRMWPatExternalSym, inst16_32>; + def : TerRMWPatExternalSym, inst8_64>; + def : TerRMWPatExternalSym, inst16_64>; + + // Truncating-extending ternary RMWs with just a constant offset + def : TerRMWPatOffsetOnly, inst8_32>; + def : TerRMWPatOffsetOnly, inst16_32>; + def : TerRMWPatOffsetOnly, inst8_64>; + def : TerRMWPatOffsetOnly, inst16_64>; + def : TerRMWPatOffsetOnly, inst32_64>; + + def : TerRMWPatOffsetOnly, inst8_32>; + def : TerRMWPatOffsetOnly, inst16_32>; + def : TerRMWPatOffsetOnly, inst8_64>; + def : TerRMWPatOffsetOnly, inst16_64>; + + def : TerRMWPatGlobalAddrOffOnly, inst8_32>; + def : TerRMWPatGlobalAddrOffOnly, inst16_32>; + def : TerRMWPatGlobalAddrOffOnly, inst8_64>; + def : TerRMWPatGlobalAddrOffOnly, inst16_64>; + def : TerRMWPatGlobalAddrOffOnly, inst32_64>; + + def : TerRMWPatGlobalAddrOffOnly, inst8_32>; + def : TerRMWPatGlobalAddrOffOnly, inst16_32>; + def : TerRMWPatGlobalAddrOffOnly, inst8_64>; + def : TerRMWPatGlobalAddrOffOnly, inst16_64>; + + def : TerRMWPatExternSymOffOnly, inst8_32>; + def : TerRMWPatExternSymOffOnly, inst16_32>; + def : TerRMWPatExternSymOffOnly, inst8_64>; + def : TerRMWPatExternSymOffOnly, inst16_64>; + def : TerRMWPatExternSymOffOnly, inst32_64>; + + def : TerRMWPatExternSymOffOnly, inst8_32>; + def : TerRMWPatExternSymOffOnly, inst16_32>; + def : TerRMWPatExternSymOffOnly, inst8_64>; + def : TerRMWPatExternSymOffOnly, inst16_64>; +} + +let Predicates = [HasAtomics] in { +defm : TerRMWTruncExtPattern< + atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64, + ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32, + ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64, + ATOMIC_RMW32_U_CMPXCHG_I64>; +} // Predicates = [HasAtomics] Index: lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -119,6 +119,8 @@ case WebAssembly::ATOMIC_RMW8_U_XOR_I64: case WebAssembly::ATOMIC_RMW8_U_XCHG_I32: case WebAssembly::ATOMIC_RMW8_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I32: + case WebAssembly::ATOMIC_RMW8_U_CMPXCHG_I64: case WebAssembly::ATOMIC_RMW16_U_ADD_I32: case WebAssembly::ATOMIC_RMW16_U_ADD_I64: case WebAssembly::ATOMIC_RMW16_U_SUB_I32: @@ -131,6 +133,8 @@ case WebAssembly::ATOMIC_RMW16_U_XOR_I64: case WebAssembly::ATOMIC_RMW16_U_XCHG_I32: case WebAssembly::ATOMIC_RMW16_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I32: + case WebAssembly::ATOMIC_RMW16_U_CMPXCHG_I64: case WebAssembly::ATOMIC_RMW_ADD_I32: case WebAssembly::ATOMIC_RMW32_U_ADD_I64: case WebAssembly::ATOMIC_RMW_SUB_I32: @@ -143,12 +147,15 @@ case WebAssembly::ATOMIC_RMW32_U_XOR_I64: case WebAssembly::ATOMIC_RMW_XCHG_I32: case WebAssembly::ATOMIC_RMW32_U_XCHG_I64: + case WebAssembly::ATOMIC_RMW_CMPXCHG_I32: + case WebAssembly::ATOMIC_RMW32_U_CMPXCHG_I64: case WebAssembly::ATOMIC_RMW_ADD_I64: case WebAssembly::ATOMIC_RMW_SUB_I64: case WebAssembly::ATOMIC_RMW_AND_I64: case WebAssembly::ATOMIC_RMW_OR_I64: case WebAssembly::ATOMIC_RMW_XOR_I64: case WebAssembly::ATOMIC_RMW_XCHG_I64: + case WebAssembly::ATOMIC_RMW_CMPXCHG_I64: RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo); break; case WebAssembly::STORE_I32: Index: test/CodeGen/WebAssembly/atomic-mem-consistency.ll =================================================================== --- test/CodeGen/WebAssembly/atomic-mem-consistency.ll +++ test/CodeGen/WebAssembly/atomic-mem-consistency.ll @@ -139,3 +139,109 @@ %old = atomicrmw add i32* %p, i32 %v seq_cst ret i32 %old } + +; Ternary RMW instruction: cmpxchg +; The success and failure ordering arguments specify how this cmpxchg +; synchronizes with other atomic operations. Both ordering parameters must be at +; least monotonic, the ordering constraint on failure must be no stronger than +; that on success, and the failure ordering cannot be either release or acq_rel. + +; CHECK-LABEL: cmpxchg_i32_monotonic_monotonic: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_monotonic_monotonic(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new monotonic monotonic + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_acquire_monotonic: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_acquire_monotonic(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new acquire monotonic + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_release_monotonic: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_release_monotonic(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new release monotonic + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_acq_rel_monotonic: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_acq_rel_monotonic(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new acq_rel monotonic + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_seq_cst_monotonic: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_seq_cst_monotonic(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst monotonic + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_acquire_acquire: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_acquire_acquire(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new acquire acquire + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_release_acquire: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_release_acquire(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new release acquire + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_acq_rel_acquire: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_acq_rel_acquire(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new acq_rel acquire + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_seq_cst_acquire: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_seq_cst_acquire(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst acquire + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_seq_cst_seq_cst: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_seq_cst_seq_cst(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} Index: test/CodeGen/WebAssembly/atomic-rmw.ll =================================================================== --- test/CodeGen/WebAssembly/atomic-rmw.ll +++ test/CodeGen/WebAssembly/atomic-rmw.ll @@ -64,6 +64,27 @@ ret i32 %old } +; CHECK-LABEL: cmpxchg_i32_loaded_value: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_loaded_value(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_success: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: i32.eq $push1=, $pop0, $1{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i1 @cmpxchg_i32_success(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst seq_cst + %succ = extractvalue { i32, i1 } %pair, 1 + ret i1 %succ +} + ;===---------------------------------------------------------------------------- ; Atomic read-modify-writes: 64-bit ;===---------------------------------------------------------------------------- @@ -122,6 +143,27 @@ ret i64 %old } +; CHECK-LABEL: cmpxchg_i64_loaded_value: +; CHECK-NEXT: .param i32, i64, i64{{$}} +; CHECK: i64.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @cmpxchg_i64_loaded_value(i64* %p, i64 %exp, i64 %new) { + %pair = cmpxchg i64* %p, i64 %exp, i64 %new seq_cst seq_cst + %old = extractvalue { i64, i1 } %pair, 0 + ret i64 %old +} + +; CHECK-LABEL: cmpxchg_i64_success: +; CHECK-NEXT: .param i32, i64, i64{{$}} +; CHECK: i64.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: i64.eq $push1=, $pop0, $1{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i1 @cmpxchg_i64_success(i64* %p, i64 %exp, i64 %new) { + %pair = cmpxchg i64* %p, i64 %exp, i64 %new seq_cst seq_cst + %succ = extractvalue { i64, i1 } %pair, 1 + ret i1 %succ +} + ;===---------------------------------------------------------------------------- ; Atomic truncating & sign-extending RMWs ;===---------------------------------------------------------------------------- @@ -510,6 +552,81 @@ ret i64 %e } +; cmpxchg + +; CHECK-LABEL: cmpxchg_sext_i8_i32: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @cmpxchg_sext_i8_i32(i8* %p, i32 %exp, i32 %new) { + %exp_t = trunc i32 %exp to i8 + %new_t = trunc i32 %new to i8 + %pair = cmpxchg i8* %p, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %e = sext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: cmpxchg_sext_i16_i32: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @cmpxchg_sext_i16_i32(i16* %p, i32 %exp, i32 %new) { + %exp_t = trunc i32 %exp to i16 + %new_t = trunc i32 %new to i16 + %pair = cmpxchg i16* %p, i16 %exp_t, i16 %new_t seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + %e = sext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: cmpxchg_sext_i8_i64: +; CHECK-NEXT: .param i32, i64, i64{{$}} +; CHECK: i64.atomic.rmw8_u.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: i64.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @cmpxchg_sext_i8_i64(i8* %p, i64 %exp, i64 %new) { + %exp_t = trunc i64 %exp to i8 + %new_t = trunc i64 %new to i8 + %pair = cmpxchg i8* %p, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %e = sext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: cmpxchg_sext_i16_i64: +; CHECK-NEXT: .param i32, i64, i64{{$}} +; CHECK: i64.atomic.rmw16_u.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: i64.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @cmpxchg_sext_i16_i64(i16* %p, i64 %exp, i64 %new) { + %exp_t = trunc i64 %exp to i16 + %new_t = trunc i64 %new to i16 + %pair = cmpxchg i16* %p, i16 %exp_t, i16 %new_t seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + %e = sext i16 %old to i64 + ret i64 %e +} + +; 32->64 sext rmw gets selected as i32.atomic.rmw.cmpxchg, i64_extend_s/i32 +; CHECK-LABEL: cmpxchg_sext_i32_i64: +; CHECK-NEXT: .param i32, i64, i64{{$}} +; CHECK: i32.wrap/i64 $push1=, $1{{$}} +; CHECK-NEXT: i32.wrap/i64 $push0=, $2{{$}} +; CHECK-NEXT: i32.atomic.rmw.cmpxchg $push2=, 0($0), $pop1, $pop0{{$}} +; CHECK-NEXT: i64.extend_s/i32 $push3=, $pop2{{$}} +; CHECK-NEXT: return $pop3{{$}} +define i64 @cmpxchg_sext_i32_i64(i32* %p, i64 %exp, i64 %new) { + %exp_t = trunc i64 %exp to i32 + %new_t = trunc i64 %new to i32 + %pair = cmpxchg i32* %p, i32 %exp_t, i32 %new_t seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + %e = sext i32 %old to i64 + ret i64 %e +} + ;===---------------------------------------------------------------------------- ; Atomic truncating & zero-extending RMWs ;===---------------------------------------------------------------------------- @@ -855,3 +972,70 @@ %e = zext i32 %old to i64 ret i64 %e } + +; cmpxchg + +; CHECK-LABEL: cmpxchg_zext_i8_i32: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_zext_i8_i32(i8* %p, i32 %exp, i32 %new) { + %exp_t = trunc i32 %exp to i8 + %new_t = trunc i32 %new to i8 + %pair = cmpxchg i8* %p, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %e = zext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: cmpxchg_zext_i16_i32: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw16_u.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_zext_i16_i32(i16* %p, i32 %exp, i32 %new) { + %exp_t = trunc i32 %exp to i16 + %new_t = trunc i32 %new to i16 + %pair = cmpxchg i16* %p, i16 %exp_t, i16 %new_t seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + %e = zext i16 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: cmpxchg_zext_i8_i64: +; CHECK-NEXT: .param i32, i64, i64{{$}} +; CHECK: i64.atomic.rmw8_u.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @cmpxchg_zext_i8_i64(i8* %p, i64 %exp, i64 %new) { + %exp_t = trunc i64 %exp to i8 + %new_t = trunc i64 %new to i8 + %pair = cmpxchg i8* %p, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %e = zext i8 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: cmpxchg_zext_i16_i64: +; CHECK-NEXT: .param i32, i64, i64{{$}} +; CHECK: i64.atomic.rmw16_u.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @cmpxchg_zext_i16_i64(i16* %p, i64 %exp, i64 %new) { + %exp_t = trunc i64 %exp to i16 + %new_t = trunc i64 %new to i16 + %pair = cmpxchg i16* %p, i16 %exp_t, i16 %new_t seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + %e = zext i16 %old to i64 + ret i64 %e +} + +; CHECK-LABEL: cmpxchg_zext_i32_i64: +; CHECK-NEXT: .param i32, i64, i64{{$}} +; CHECK: i64.atomic.rmw32_u.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @cmpxchg_zext_i32_i64(i32* %p, i64 %exp, i64 %new) { + %exp_t = trunc i64 %exp to i32 + %new_t = trunc i64 %new to i32 + %pair = cmpxchg i32* %p, i32 %exp_t, i32 %new_t seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + %e = zext i32 %old to i64 + ret i64 %e +} Index: test/CodeGen/WebAssembly/offset-atomics.ll =================================================================== --- test/CodeGen/WebAssembly/offset-atomics.ll +++ test/CodeGen/WebAssembly/offset-atomics.ll @@ -1070,3 +1070,458 @@ %old = atomicrmw add i8* %p, i8 %t seq_cst ret i8 %old } + +;===---------------------------------------------------------------------------- +; Atomic ternary read-modify-writes: 32-bit +;===---------------------------------------------------------------------------- + +; Basic RMW. + +; CHECK-LABEL: cmpxchg_i32_no_offset: +; CHECK-NEXT: .param i32, i32, i32{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @cmpxchg_i32_no_offset(i32* %p, i32 %exp, i32 %new) { + %pair = cmpxchg i32* %p, i32 %exp, i32 %new seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; With an nuw add, we can fold an offset. + +; CHECK-LABEL: cmpxchg_i32_with_folded_offset: +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} +define i32 @cmpxchg_i32_with_folded_offset(i32* %p, i32 %exp, i32 %new) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; With an inbounds gep, we can fold an offset. + +; CHECK-LABEL: cmpxchg_i32_with_folded_gep_offset: +; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} +define i32 @cmpxchg_i32_with_folded_gep_offset(i32* %p, i32 %exp, i32 %new) { + %s = getelementptr inbounds i32, i32* %p, i32 6 + %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; We can't fold a negative offset though, even with an inbounds gep. + +; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} +define i32 @cmpxchg_i32_with_unfolded_gep_negative_offset(i32* %p, i32 %exp, i32 %new) { + %s = getelementptr inbounds i32, i32* %p, i32 -6 + %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; Without nuw, and even with nsw, we can't fold an offset. + +; CHECK-LABEL: cmpxchg_i32_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} +define i32 @cmpxchg_i32_with_unfolded_offset(i32* %p, i32 %exp, i32 %new) { + %q = ptrtoint i32* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; Without inbounds, we can't fold a gep offset. + +; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} +define i32 @cmpxchg_i32_with_unfolded_gep_offset(i32* %p, i32 %exp, i32 %new) { + %s = getelementptr i32, i32* %p, i32 6 + %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: cmpxchg_i32_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push1=, 42($pop0), $0, $1{{$}} +define i32 @cmpxchg_i32_from_numeric_address(i32 %exp, i32 %new) { + %s = inttoptr i32 42 to i32* + %pair = cmpxchg i32* %s, i32 %exp, i32 %new seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +; CHECK-LABEL: cmpxchg_i32_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw.cmpxchg $push1=, gv($pop0), $0, $1{{$}} +define i32 @cmpxchg_i32_from_global_address(i32 %exp, i32 %new) { + %pair = cmpxchg i32* @gv, i32 %exp, i32 %new seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + ret i32 %old +} + +;===---------------------------------------------------------------------------- +; Atomic ternary read-modify-writes: 64-bit +;===---------------------------------------------------------------------------- + +; Basic RMW. + +; CHECK-LABEL: cmpxchg_i64_no_offset: +; CHECK-NEXT: .param i32, i64, i64{{$}} +; CHECK: i64.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @cmpxchg_i64_no_offset(i64* %p, i64 %exp, i64 %new) { + %pair = cmpxchg i64* %p, i64 %exp, i64 %new seq_cst seq_cst + %old = extractvalue { i64, i1 } %pair, 0 + ret i64 %old +} + +; With an nuw add, we can fold an offset. + +; CHECK-LABEL: cmpxchg_i64_with_folded_offset: +; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} +define i64 @cmpxchg_i64_with_folded_offset(i64* %p, i64 %exp, i64 %new) { + %q = ptrtoint i64* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst + %old = extractvalue { i64, i1 } %pair, 0 + ret i64 %old +} + +; With an inbounds gep, we can fold an offset. + +; CHECK-LABEL: cmpxchg_i64_with_folded_gep_offset: +; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}} +define i64 @cmpxchg_i64_with_folded_gep_offset(i64* %p, i64 %exp, i64 %new) { + %s = getelementptr inbounds i64, i64* %p, i32 3 + %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst + %old = extractvalue { i64, i1 } %pair, 0 + ret i64 %old +} + +; We can't fold a negative offset though, even with an inbounds gep. + +; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} +define i64 @cmpxchg_i64_with_unfolded_gep_negative_offset(i64* %p, i64 %exp, i64 %new) { + %s = getelementptr inbounds i64, i64* %p, i32 -3 + %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst + %old = extractvalue { i64, i1 } %pair, 0 + ret i64 %old +} + +; Without nuw, and even with nsw, we can't fold an offset. + +; CHECK-LABEL: cmpxchg_i64_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} +define i64 @cmpxchg_i64_with_unfolded_offset(i64* %p, i64 %exp, i64 %new) { + %q = ptrtoint i64* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst + %old = extractvalue { i64, i1 } %pair, 0 + ret i64 %old +} + +; Without inbounds, we can't fold a gep offset. + +; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}} +define i64 @cmpxchg_i64_with_unfolded_gep_offset(i64* %p, i64 %exp, i64 %new) { + %s = getelementptr i64, i64* %p, i32 3 + %pair = cmpxchg i64* %s, i64 %exp, i64 %new seq_cst seq_cst + %old = extractvalue { i64, i1 } %pair, 0 + ret i64 %old +} + +;===---------------------------------------------------------------------------- +; Atomic truncating & sign-extending ternary RMWs +;===---------------------------------------------------------------------------- + +; Fold an offset into a sign-extending rmw. + +; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_offset: +; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 24($0), $1, $2{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0 +define i32 @cmpxchg_i8_i32_s_with_folded_offset(i8* %p, i32 %exp, i32 %new) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %exp_t = trunc i32 %exp to i8 + %new_t = trunc i32 %new to i8 + %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %u = sext i8 %old to i32 + ret i32 %u +} + +; 32->64 sext rmw gets selected as i32.atomic.rmw.cmpxchg, i64_extend_s/i32 +; CHECK-LABEL: cmpxchg_i32_i64_s_with_folded_offset: +; CHECK: i32.wrap/i64 $push1=, $1 +; CHECK-NEXT: i32.wrap/i64 $push0=, $2 +; CHECK-NEXT: i32.atomic.rmw.cmpxchg $push2=, 24($0), $pop1, $pop0{{$}} +; CHECK-NEXT: i64.extend_s/i32 $push3=, $pop2{{$}} +define i64 @cmpxchg_i32_i64_s_with_folded_offset(i32* %p, i64 %exp, i64 %new) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %exp_t = trunc i64 %exp to i32 + %new_t = trunc i64 %new to i32 + %pair = cmpxchg i32* %s, i32 %exp_t, i32 %new_t seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + %u = sext i32 %old to i64 + ret i64 %u +} + +; Fold a gep offset into a sign-extending rmw. + +; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_gep_offset: +; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 24($0), $1, $2{{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0 +define i32 @cmpxchg_i8_i32_s_with_folded_gep_offset(i8* %p, i32 %exp, i32 %new) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + %exp_t = trunc i32 %exp to i8 + %new_t = trunc i32 %new to i8 + %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %u = sext i8 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: cmpxchg_i16_i32_s_with_folded_gep_offset: +; CHECK: i32.atomic.rmw16_u.cmpxchg $push0=, 48($0), $1, $2{{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0 +define i32 @cmpxchg_i16_i32_s_with_folded_gep_offset(i16* %p, i32 %exp, i32 %new) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %exp_t = trunc i32 %exp to i16 + %new_t = trunc i32 %new to i16 + %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + %u = sext i16 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: cmpxchg_i16_i64_s_with_folded_gep_offset: +; CHECK: i64.atomic.rmw16_u.cmpxchg $push0=, 48($0), $1, $2{{$}} +; CHECK-NEXT: i64.extend16_s $push1=, $pop0 +define i64 @cmpxchg_i16_i64_s_with_folded_gep_offset(i16* %p, i64 %exp, i64 %new) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %exp_t = trunc i64 %exp to i16 + %new_t = trunc i64 %new to i16 + %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + %u = sext i16 %old to i64 + ret i64 %u +} + +; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as +; an 'add' if the or'ed bits are known to be zero. + +; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_or_offset: +; CHECK: i32.atomic.rmw8_u.cmpxchg $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} +; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} +define i32 @cmpxchg_i8_i32_s_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %exp_t = trunc i32 %exp to i8 + %new_t = trunc i32 %new to i8 + %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %conv = sext i8 %old to i32 + ret i32 %conv +} + +; CHECK-LABEL: cmpxchg_i8_i64_s_with_folded_or_offset: +; CHECK: i64.atomic.rmw8_u.cmpxchg $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} +; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} +define i64 @cmpxchg_i8_i64_s_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %exp_t = trunc i64 %exp to i8 + %new_t = trunc i64 %new to i8 + %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %conv = sext i8 %old to i64 + ret i64 %conv +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: cmpxchg_i16_i32_s_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw16_u.cmpxchg $push1=, 42($pop0), $0, $1{{$}} +; CHECK-NEXT: i32.extend16_s $push2=, $pop1 +define i32 @cmpxchg_i16_i32_s_from_numeric_address(i32 %exp, i32 %new) { + %s = inttoptr i32 42 to i16* + %exp_t = trunc i32 %exp to i16 + %new_t = trunc i32 %new to i16 + %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + %u = sext i16 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: cmpxchg_i8_i32_s_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw8_u.cmpxchg $push1=, gv8($pop0), $0, $1{{$}} +; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} +define i32 @cmpxchg_i8_i32_s_from_global_address(i32 %exp, i32 %new) { + %exp_t = trunc i32 %exp to i8 + %new_t = trunc i32 %new to i8 + %pair = cmpxchg i8* @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %u = sext i8 %old to i32 + ret i32 %u +} + +;===---------------------------------------------------------------------------- +; Atomic truncating & zero-extending ternary RMWs +;===---------------------------------------------------------------------------- + +; Fold an offset into a sign-extending rmw. + +; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_offset: +; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 24($0), $1, $2{{$}} +define i32 @cmpxchg_i8_i32_z_with_folded_offset(i8* %p, i32 %exp, i32 %new) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %exp_t = trunc i32 %exp to i8 + %new_t = trunc i32 %new to i8 + %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %u = zext i8 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: cmpxchg_i32_i64_z_with_folded_offset: +; CHECK: i64.atomic.rmw32_u.cmpxchg $push0=, 24($0), $1, $2{{$}} +define i64 @cmpxchg_i32_i64_z_with_folded_offset(i32* %p, i64 %exp, i64 %new) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %exp_t = trunc i64 %exp to i32 + %new_t = trunc i64 %new to i32 + %pair = cmpxchg i32* %s, i32 %exp_t, i32 %new_t seq_cst seq_cst + %old = extractvalue { i32, i1 } %pair, 0 + %u = zext i32 %old to i64 + ret i64 %u +} + +; Fold a gep offset into a sign-extending rmw. + +; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_gep_offset: +; CHECK: i32.atomic.rmw8_u.cmpxchg $push0=, 24($0), $1, $2{{$}} +define i32 @cmpxchg_i8_i32_z_with_folded_gep_offset(i8* %p, i32 %exp, i32 %new) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + %exp_t = trunc i32 %exp to i8 + %new_t = trunc i32 %new to i8 + %pair = cmpxchg i8* %s, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %u = zext i8 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: cmpxchg_i16_i32_z_with_folded_gep_offset: +; CHECK: i32.atomic.rmw16_u.cmpxchg $push0=, 48($0), $1, $2{{$}} +define i32 @cmpxchg_i16_i32_z_with_folded_gep_offset(i16* %p, i32 %exp, i32 %new) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %exp_t = trunc i32 %exp to i16 + %new_t = trunc i32 %new to i16 + %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + %u = zext i16 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: cmpxchg_i16_i64_z_with_folded_gep_offset: +; CHECK: i64.atomic.rmw16_u.cmpxchg $push0=, 48($0), $1, $2{{$}} +define i64 @cmpxchg_i16_i64_z_with_folded_gep_offset(i16* %p, i64 %exp, i64 %new) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %exp_t = trunc i64 %exp to i16 + %new_t = trunc i64 %new to i16 + %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + %u = zext i16 %old to i64 + ret i64 %u +} + +; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as +; an 'add' if the or'ed bits are known to be zero. + +; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_or_offset: +; CHECK: i32.atomic.rmw8_u.cmpxchg $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} +define i32 @cmpxchg_i8_i32_z_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %exp_t = trunc i32 %exp to i8 + %new_t = trunc i32 %new to i8 + %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %conv = zext i8 %old to i32 + ret i32 %conv +} + +; CHECK-LABEL: cmpxchg_i8_i64_z_with_folded_or_offset: +; CHECK: i64.atomic.rmw8_u.cmpxchg $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}} +define i64 @cmpxchg_i8_i64_z_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %exp_t = trunc i64 %exp to i8 + %new_t = trunc i64 %new to i8 + %pair = cmpxchg i8* %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %conv = zext i8 %old to i64 + ret i64 %conv +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: cmpxchg_i16_i32_z_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw16_u.cmpxchg $push1=, 42($pop0), $0, $1{{$}} +define i32 @cmpxchg_i16_i32_z_from_numeric_address(i32 %exp, i32 %new) { + %s = inttoptr i32 42 to i16* + %exp_t = trunc i32 %exp to i16 + %new_t = trunc i32 %new to i16 + %pair = cmpxchg i16* %s, i16 %exp_t, i16 %new_t seq_cst seq_cst + %old = extractvalue { i16, i1 } %pair, 0 + %u = zext i16 %old to i32 + ret i32 %u +} + +; CHECK-LABEL: cmpxchg_i8_i32_z_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.rmw8_u.cmpxchg $push1=, gv8($pop0), $0, $1{{$}} +define i32 @cmpxchg_i8_i32_z_from_global_address(i32 %exp, i32 %new) { + %exp_t = trunc i32 %exp to i8 + %new_t = trunc i32 %new to i8 + %pair = cmpxchg i8* @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst + %old = extractvalue { i8, i1 } %pair, 0 + %u = zext i8 %old to i32 + ret i32 %u +}