Index: lib/Target/WebAssembly/WebAssemblyISelLowering.h =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -44,6 +44,7 @@ /// right decision when generating code for different targets. const WebAssemblySubtarget *Subtarget; + AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -157,6 +157,23 @@ setMaxAtomicSizeInBitsSupported(64); } +TargetLowering::AtomicExpansionKind +WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + // We have wasm instructions for these + switch (AI->getOperation()) { + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::And: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + case AtomicRMWInst::Xchg: + return AtomicExpansionKind::None; + default: + break; + } + return AtomicExpansionKind::CmpXChg; +} + FastISel *WebAssemblyTargetLowering::createFastISel( FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { return WebAssembly::createFastISel(FuncInfo, LibInfo); Index: lib/Target/WebAssembly/WebAssemblyInstrAtomics.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -660,10 +660,11 @@ // Atomic ternary read-modify-writes //===----------------------------------------------------------------------===// -// TODO LLVM IR's cmpxchg instruction returns a pair of {loaded value, -// success flag}. When we use a success flag or both values, we can't make use -// of truncate/extend versions of instructions for now, which is suboptimal. Add -// selection rules for those cases too. +// TODO LLVM IR's cmpxchg instruction returns a pair of {loaded value, success +// flag}. When we use the success flag or both values, we can't make use of i64 +// truncate/extend versions of instructions for now, which is suboptimal. +// Consider adding a pass after instruction selection that optimizes this case +// if it is frequent. let Defs = [ARGUMENTS] in { Index: test/CodeGen/WebAssembly/atomic-rmw.ll =================================================================== --- test/CodeGen/WebAssembly/atomic-rmw.ll +++ test/CodeGen/WebAssembly/atomic-rmw.ll @@ -85,6 +85,58 @@ ret i1 %succ } +; Unsupported instructions are expanded using cmpxchg with a loop. + +; CHECK-LABEL: nand_i32: +; CHECK: loop +; CHECK: i32.atomic.rmw.cmpxchg +; CHECK: br_if 0 +; CHECK: end_loop +define i32 @nand_i32(i32* %p, i32 %v) { + %old = atomicrmw nand i32* %p, i32 %v seq_cst + ret i32 %old +} + +; CHECK-LABEL: max_i32: +; CHECK: loop +; CHECK: i32.atomic.rmw.cmpxchg +; CHECK: br_if 0 +; CHECK: end_loop +define i32 @max_i32(i32* %p, i32 %v) { + %old = atomicrmw max i32* %p, i32 %v seq_cst + ret i32 %old +} + +; CHECK-LABEL: min_i32: +; CHECK: loop +; CHECK: i32.atomic.rmw.cmpxchg +; CHECK: br_if 0 +; CHECK: end_loop +define i32 @min_i32(i32* %p, i32 %v) { + %old = atomicrmw min i32* %p, i32 %v seq_cst + ret i32 %old +} + +; CHECK-LABEL: umax_i32: +; CHECK: loop +; CHECK: i32.atomic.rmw.cmpxchg +; CHECK: br_if 0 +; CHECK: end_loop +define i32 @umax_i32(i32* %p, i32 %v) { + %old = atomicrmw umax i32* %p, i32 %v seq_cst + ret i32 %old +} + +; CHECK-LABEL: umin_i32: +; CHECK: loop +; CHECK: i32.atomic.rmw.cmpxchg +; CHECK: br_if 0 +; CHECK: end_loop +define i32 @umin_i32(i32* %p, i32 %v) { + %old = atomicrmw umin i32* %p, i32 %v seq_cst + ret i32 %old +} + ;===---------------------------------------------------------------------------- ; Atomic read-modify-writes: 64-bit ;===---------------------------------------------------------------------------- @@ -164,6 +216,58 @@ ret i1 %succ } +; Unsupported instructions are expanded using cmpxchg with a loop. + +; CHECK-LABEL: nand_i64: +; CHECK: loop +; CHECK: i64.atomic.rmw.cmpxchg +; CHECK: br_if 0 +; CHECK: end_loop +define i64 @nand_i64(i64* %p, i64 %v) { + %old = atomicrmw nand i64* %p, i64 %v seq_cst + ret i64 %old +} + +; CHECK-LABEL: max_i64: +; CHECK: loop +; CHECK: i64.atomic.rmw.cmpxchg +; CHECK: br_if 0 +; CHECK: end_loop +define i64 @max_i64(i64* %p, i64 %v) { + %old = atomicrmw max i64* %p, i64 %v seq_cst + ret i64 %old +} + +; CHECK-LABEL: min_i64: +; CHECK: loop +; CHECK: i64.atomic.rmw.cmpxchg +; CHECK: br_if 0 +; CHECK: end_loop +define i64 @min_i64(i64* %p, i64 %v) { + %old = atomicrmw min i64* %p, i64 %v seq_cst + ret i64 %old +} + +; CHECK-LABEL: umax_i64: +; CHECK: loop +; CHECK: i64.atomic.rmw.cmpxchg +; CHECK: br_if 0 +; CHECK: end_loop +define i64 @umax_i64(i64* %p, i64 %v) { + %old = atomicrmw umax i64* %p, i64 %v seq_cst + ret i64 %old +} + +; CHECK-LABEL: umin_i64: +; CHECK: loop +; CHECK: i64.atomic.rmw.cmpxchg +; CHECK: br_if 0 +; CHECK: end_loop +define i64 @umin_i64(i64* %p, i64 %v) { + %old = atomicrmw umin i64* %p, i64 %v seq_cst + ret i64 %old +} + ;===---------------------------------------------------------------------------- ; Atomic truncating & sign-extending RMWs ;===---------------------------------------------------------------------------- @@ -627,6 +731,76 @@ ret i64 %e } +; Unsupported instructions are expanded using cmpxchg with a loop. +; Here we take a nand as an example. + +; nand + +; CHECK-LABEL: nand_sext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: loop +; CHECK: i32.atomic.rmw8_u.cmpxchg +; CHECK: i32.extend8_s +define i32 @nand_sext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw nand i8* %p, i8 %t seq_cst + %e = sext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: nand_sext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: loop +; CHECK: i32.atomic.rmw16_u.cmpxchg +; CHECK: i32.extend16_s +define i32 @nand_sext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw nand i16* %p, i16 %t seq_cst + %e = sext i16 %old to i32 + ret i32 %e +} + +; FIXME Currently this cannot make use of i64.atomic.rmw8_u.cmpxchg +; CHECK-LABEL: nand_sext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: loop +; CHECK: i32.atomic.rmw8_u.cmpxchg +; CHECK: i64.extend_u/i32 +; CHECK: i64.extend8_s +define i64 @nand_sext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw nand i8* %p, i8 %t seq_cst + %e = sext i8 %old to i64 + ret i64 %e +} + +; FIXME Currently this cannot make use of i64.atomic.rmw16_u.cmpxchg +; CHECK-LABEL: nand_sext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: loop +; CHECK: i32.atomic.rmw16_u.cmpxchg +; CHECK: i64.extend_u/i32 +; CHECK: i64.extend16_s +define i64 @nand_sext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw nand i16* %p, i16 %t seq_cst + %e = sext i16 %old to i64 + ret i64 %e +} + +; 32->64 sext rmw gets selected as i32.atomic.rmw.nand, i64_extend_s/i32 +; CHECK-LABEL: nand_sext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: loop +; CHECK: i32.atomic.rmw.cmpxchg +; CHECK: i64.extend_s/i32 +define i64 @nand_sext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw nand i32* %p, i32 %t seq_cst + %e = sext i32 %old to i64 + ret i64 %e +} + ;===---------------------------------------------------------------------------- ; Atomic truncating & zero-extending RMWs ;===---------------------------------------------------------------------------- @@ -1039,3 +1213,69 @@ %e = zext i32 %old to i64 ret i64 %e } + +; Unsupported instructions are expanded using cmpxchg with a loop. +; Here we take a nand as an example. + +; nand + +; CHECK-LABEL: nand_zext_i8_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: loop +; CHECK: i32.atomic.rmw8_u.cmpxchg +define i32 @nand_zext_i8_i32(i8* %p, i32 %v) { + %t = trunc i32 %v to i8 + %old = atomicrmw nand i8* %p, i8 %t seq_cst + %e = zext i8 %old to i32 + ret i32 %e +} + +; CHECK-LABEL: nand_zext_i16_i32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK: loop +; CHECK: i32.atomic.rmw16_u.cmpxchg +define i32 @nand_zext_i16_i32(i16* %p, i32 %v) { + %t = trunc i32 %v to i16 + %old = atomicrmw nand i16* %p, i16 %t seq_cst + %e = zext i16 %old to i32 + ret i32 %e +} + +; FIXME Currently this cannot make use of i64.atomic.rmw8_u.cmpxchg +; CHECK-LABEL: nand_zext_i8_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: loop +; CHECK: i32.atomic.rmw8_u.cmpxchg +; CHECK: i64.extend_u/i32 +define i64 @nand_zext_i8_i64(i8* %p, i64 %v) { + %t = trunc i64 %v to i8 + %old = atomicrmw nand i8* %p, i8 %t seq_cst + %e = zext i8 %old to i64 + ret i64 %e +} + +; FIXME Currently this cannot make use of i64.atomic.rmw16_u.cmpxchg +; CHECK-LABEL: nand_zext_i16_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: loop +; CHECK: i32.atomic.rmw16_u.cmpxchg +; CHECK: i64.extend_u/i32 +define i64 @nand_zext_i16_i64(i16* %p, i64 %v) { + %t = trunc i64 %v to i16 + %old = atomicrmw nand i16* %p, i16 %t seq_cst + %e = zext i16 %old to i64 + ret i64 %e +} + +; FIXME Currently this cannot make use of i64.atomic.rmw32_u.cmpxchg +; CHECK-LABEL: nand_zext_i32_i64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK: loop +; CHECK: i32.atomic.rmw.cmpxchg +; CHECK: i64.extend_u/i32 +define i64 @nand_zext_i32_i64(i32* %p, i64 %v) { + %t = trunc i64 %v to i32 + %old = atomicrmw nand i32* %p, i32 %t seq_cst + %e = zext i32 %old to i64 + ret i64 %e +}