Index: lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h =================================================================== --- lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -111,6 +111,8 @@ case WebAssembly::LOAD8_U_I32: case WebAssembly::LOAD8_S_I64: case WebAssembly::LOAD8_U_I64: + case WebAssembly::ATOMIC_LOAD8_U_I32: + case WebAssembly::ATOMIC_LOAD8_U_I64: case WebAssembly::STORE8_I32: case WebAssembly::STORE8_I64: return 0; @@ -118,6 +120,8 @@ case WebAssembly::LOAD16_U_I32: case WebAssembly::LOAD16_S_I64: case WebAssembly::LOAD16_U_I64: + case WebAssembly::ATOMIC_LOAD16_U_I32: + case WebAssembly::ATOMIC_LOAD16_U_I64: case WebAssembly::STORE16_I32: case WebAssembly::STORE16_I64: return 1; @@ -129,11 +133,13 @@ case WebAssembly::LOAD32_U_I64: case WebAssembly::STORE32_I64: case WebAssembly::ATOMIC_LOAD_I32: + case WebAssembly::ATOMIC_LOAD32_U_I64: return 2; case WebAssembly::LOAD_I64: case WebAssembly::LOAD_F64: case WebAssembly::STORE_I64: case WebAssembly::STORE_F64: + case WebAssembly::ATOMIC_LOAD_I64: return 3; default: llvm_unreachable("Only loads and stores have p2align values"); Index: lib/Target/WebAssembly/WebAssemblyInstrAtomics.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -17,19 +17,180 @@ //===----------------------------------------------------------------------===// let Defs = [ARGUMENTS] in { -// TODO: add the rest of the atomic loads -def ATOMIC_LOAD_I32 : CLoadI32<"i32.atomic.load", 0xfe10>; -def ATOMIC_LOAD_I64 : CLoadI64<"i64.atomic.load", 0xfe11>; +def ATOMIC_LOAD_I32 : WebAssemblyLoad; +def ATOMIC_LOAD_I64 : WebAssemblyLoad; } // Defs = [ARGUMENTS] // Select loads with no constant offset. let Predicates = [HasAtomics] in { -class ALoadPatNoOffset : - Pat<(ty (node I32:$addr)), (inst 0, 0, $addr)>; -def : ALoadPatNoOffset; -def : ALoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; -} +// Select loads with a constant offset. + +// Pattern with address + immediate offset +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; + +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; + +def : LoadPatExternalSym; +def : LoadPatExternalSym; + + +// Select loads with just a constant offset. +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; + +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; + +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; + +} // Predicates = [HasAtomics] + +// Extending loads. Note that there are only zero-extending atomic loads, no +// sign-extending loads. +let Defs = [ARGUMENTS] in { +def ATOMIC_LOAD8_U_I32 : WebAssemblyLoad; +def ATOMIC_LOAD16_U_I32 : WebAssemblyLoad; +def ATOMIC_LOAD8_U_I64 : WebAssemblyLoad; +def ATOMIC_LOAD16_U_I64 : WebAssemblyLoad; +def ATOMIC_LOAD32_U_I64 : WebAssemblyLoad; +} // Defs = [ARGUMENTS] + +// Fragments for exending loads. These are different from regular loads because +// the SDNodes are derived from AtomicSDNode rather than LoadSDNode and +// therefore don't have the extension type field. So instead of matching that, +// we match the patterns that the type legalizer expands them to. + +// We directly match zext patterns and select the zext atomic loads. +// i32 (zext (i8 (atomic_load_8))) gets legalized to +// i32 (and (i32 (atomic_load_8)), 255) +// These can be selected to a single zero-extending atomic load instruction. +def zext_aload_8 : PatFrag<(ops node:$addr), + (and (i32 (atomic_load_8 node:$addr)), 255)>; +def zext_aload_16 : PatFrag<(ops node:$addr), + (and (i32 (atomic_load_16 node:$addr)), 65535)>; +// Unlike regular loads, extension to i64 is handled differently than i32. +// i64 (zext (i8 (atomic_load_8))) gets legalized to +// i64 (and (i64 (anyext (i32 (atomic_load_8)))), 255) +def zext_aload_8_64 : + PatFrag<(ops node:$addr), + (and (i64 (anyext (i32 (atomic_load_8 node:$addr)))), 255)>; +def zext_aload_16_64 : + PatFrag<(ops node:$addr), + (and (i64 (anyext (i32 (atomic_load_16 node:$addr)))), 65535)>; +def zext_aload_32_64 : + PatFrag<(ops node:$addr), + (zext (i32 (atomic_load node:$addr)))>; + +// We don't have single sext atomic load instructions. So for sext loads, we +// match bare subword loads (for 32-bit results) and anyext loads (for 64-bit +// results) and select a zext load; the next instruction will be sext_inreg +// which is selected by itself. +def anyext_aload_8_64 : + PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_8 node:$addr)))>; +def anyext_aload_16_64 : + PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>; + +let Predicates = [HasAtomics] in { +// Select zero-extending loads with no constant offset. +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; + +// Select sign-extending loads with no constant offset +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +// 32->64 sext load gets selected as i32.atomic.load, i64.extend_s/i64 + + +// Zero-extending loads with constant offset +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; + +// Sign-extending loads with constant offset +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64 + +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; + +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; + + +// Extending loads with just a constant offset +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; + +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; + +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; + + +} // Predicates = [HasAtomics] //===----------------------------------------------------------------------===// // Atomic stores Index: lib/Target/WebAssembly/WebAssemblyInstrMemory.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -55,28 +55,19 @@ let Defs = [ARGUMENTS] in { -// Classes to define both atomic and non-atomic integer loads -class CLoadI32 : - I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), Opcode>; - -class CLoadI64 : - I<(outs I64:$dst), +// Defines atomic and non-atomic loads, regular and extending. +class WebAssemblyLoad : + I<(outs rc:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), Opcode>; // Basic load. // FIXME: When we can break syntax compatibility, reorder the fields in the // asmstrings to match the binary encoding. -def LOAD_I32 : CLoadI32<"i32.load", 0x28>; -def LOAD_I64 : CLoadI64<"i64.load", 0x29>; -def LOAD_F32 : I<(outs F32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "f32.load\t$dst, ${off}(${addr})${p2align}", 0x2a>; -def LOAD_F64 : I<(outs F64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "f64.load\t$dst, ${off}(${addr})${p2align}", 0x2b>; +def LOAD_I32 : WebAssemblyLoad; +def LOAD_I64 : WebAssemblyLoad; +def LOAD_F32 : WebAssemblyLoad; +def LOAD_F64 : WebAssemblyLoad; } // Defs = [ARGUMENTS] @@ -153,36 +144,16 @@ let Defs = [ARGUMENTS] in { // Extending load. -def LOAD8_S_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load8_s\t$dst, ${off}(${addr})${p2align}", 0x2c>; -def LOAD8_U_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load8_u\t$dst, ${off}(${addr})${p2align}", 0x2d>; -def LOAD16_S_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load16_s\t$dst, ${off}(${addr})${p2align}", 0x2e>; -def LOAD16_U_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load16_u\t$dst, ${off}(${addr})${p2align}", 0x2f>; -def LOAD8_S_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load8_s\t$dst, ${off}(${addr})${p2align}", 0x30>; -def LOAD8_U_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load8_u\t$dst, ${off}(${addr})${p2align}", 0x31>; -def LOAD16_S_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load16_s\t$dst, ${off}(${addr})${p2align}", 0x32>; -def LOAD16_U_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load16_u\t$dst, ${off}(${addr})${p2align}", 0x33>; -def LOAD32_S_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load32_s\t$dst, ${off}(${addr})${p2align}", 0x34>; -def LOAD32_U_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load32_u\t$dst, ${off}(${addr})${p2align}", 0x35>; +def LOAD8_S_I32 : WebAssemblyLoad; +def LOAD8_U_I32 : WebAssemblyLoad; +def LOAD16_S_I32 : WebAssemblyLoad; +def LOAD16_U_I32 : WebAssemblyLoad; +def LOAD8_S_I64 : WebAssemblyLoad; +def LOAD8_U_I64 : WebAssemblyLoad; +def LOAD16_S_I64 : WebAssemblyLoad; +def LOAD16_U_I64 : WebAssemblyLoad; +def LOAD32_S_I64 : WebAssemblyLoad; +def LOAD32_U_I64 : WebAssemblyLoad; } // Defs = [ARGUMENTS] @@ -290,7 +261,6 @@ def : LoadPatNoOffset; def : LoadPatNoOffset; - // Select "don't care" extending loads with a constant offset. def : LoadPatImmOff; def : LoadPatImmOff; @@ -313,7 +283,6 @@ def : LoadPatExternalSym; def : LoadPatExternalSym; - // Select "don't care" extending loads with just a constant offset. def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; Index: lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -97,6 +97,12 @@ case WebAssembly::LOAD32_S_I64: case WebAssembly::LOAD32_U_I64: case WebAssembly::ATOMIC_LOAD_I32: + case WebAssembly::ATOMIC_LOAD8_U_I32: + case WebAssembly::ATOMIC_LOAD16_U_I32: + case WebAssembly::ATOMIC_LOAD_I64: + case WebAssembly::ATOMIC_LOAD8_U_I64: + case WebAssembly::ATOMIC_LOAD16_U_I64: + case WebAssembly::ATOMIC_LOAD32_U_I64: RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo); break; case WebAssembly::STORE_I32: Index: test/CodeGen/WebAssembly/atomics.ll =================================================================== --- test/CodeGen/WebAssembly/atomics.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals -mattr=+atomics | FileCheck %s - -; Test that atomic loads are assembled properly. - -target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" -target triple = "wasm32-unknown-unknown-wasm" - -; CHECK-LABEL: load_i32_atomic: -; CHECK: i32.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} - -define i32 @load_i32_atomic(i32 *%p) { - %v = load atomic i32, i32* %p seq_cst, align 4 - ret i32 %v -} Index: test/CodeGen/WebAssembly/i32-load-store-alignment.ll =================================================================== --- test/CodeGen/WebAssembly/i32-load-store-alignment.ll +++ test/CodeGen/WebAssembly/i32-load-store-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s +; RUN: llc < %s -mattr=+atomics -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s ; Test loads and stores with custom alignment values. @@ -210,3 +210,29 @@ store i16 %v, i16* %p, align 4 ret void } + +; Atomics. +; Wasm atomics have the alignment field, but it must always have the +; type's natural alignment. + +; CHECK-LABEL: ldi32_atomic_a4: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i32 @ldi32_atomic_a4(i32 *%p) { + %v = load atomic i32, i32* %p seq_cst, align 4 + ret i32 %v +} + +; 8 is greater than the default alignment so it is rounded down to 4 + +; CHECK-LABEL: ldi32_atomic_a8: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i32 @ldi32_atomic_a8(i32 *%p) { + %v = load atomic i32, i32* %p seq_cst, align 8 + ret i32 %v +} Index: test/CodeGen/WebAssembly/i64-load-store-alignment.ll =================================================================== --- test/CodeGen/WebAssembly/i64-load-store-alignment.ll +++ test/CodeGen/WebAssembly/i64-load-store-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s +; RUN: llc < %s -mattr=+atomics -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s ; Test loads and stores with custom alignment values. @@ -323,3 +323,26 @@ store i32 %v, i32* %p, align 8 ret void } + +; Atomics. +; CHECK-LABEL: ldi64_atomic_a8: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi64_atomic_a8(i64 *%p) { + %v = load atomic i64, i64* %p seq_cst, align 8 + ret i64 %v +} + +; 16 is greater than the default alignment so it is ignored. + +; CHECK-LABEL: ldi64_atomic_a16: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi64_atomic_a16(i64 *%p) { + %v = load atomic i64, i64* %p seq_cst, align 16 + ret i64 %v +} Index: test/CodeGen/WebAssembly/load-ext-atomic.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/load-ext-atomic.ll @@ -0,0 +1,102 @@ +; RUN: llc < %s -mattr=+atomics -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals | FileCheck %s + +; Test that extending loads are assembled properly. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +; CHECK-LABEL: sext_i8_i32: +; CHECK: i32.atomic.load8_u $push0=, 0($0){{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @sext_i8_i32(i8 *%p) { + %v = load atomic i8, i8* %p seq_cst, align 1 + %e = sext i8 %v to i32 + ret i32 %e +} + +; CHECK-LABEL: zext_i8_i32: +; CHECK: i32.atomic.load8_u $push0=, 0($0){{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @zext_i8_i32(i8 *%p) { +e1: + %v = load atomic i8, i8* %p seq_cst, align 1 + %e = zext i8 %v to i32 + ret i32 %e +} + +; CHECK-LABEL: sext_i16_i32: +; CHECK: i32.atomic.load16_u $push0=, 0($0){{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i32 @sext_i16_i32(i16 *%p) { + %v = load atomic i16, i16* %p seq_cst, align 2 + %e = sext i16 %v to i32 + ret i32 %e +} + +; CHECK-LABEL: zext_i16_i32: +; CHECK: i32.atomic.load16_u $push0=, 0($0){{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @zext_i16_i32(i16 *%p) { + %v = load atomic i16, i16* %p seq_cst, align 2 + %e = zext i16 %v to i32 + ret i32 %e +} + +; CHECK-LABEL: sext_i8_i64: +; CHECK: i64.atomic.load8_u $push0=, 0($0){{$}} +; CHECK: i64.extend8_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @sext_i8_i64(i8 *%p) { + %v = load atomic i8, i8* %p seq_cst, align 1 + %e = sext i8 %v to i64 + ret i64 %e +} + +; CHECK-LABEL: zext_i8_i64: +; CHECK: i64.atomic.load8_u $push0=, 0($0){{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @zext_i8_i64(i8 *%p) { + %v = load atomic i8, i8* %p seq_cst, align 1 + %e = zext i8 %v to i64 + ret i64 %e +} + +; CHECK-LABEL: sext_i16_i64: +; CHECK: i64.atomic.load16_u $push0=, 0($0){{$}} +; CHECK: i64.extend16_s $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @sext_i16_i64(i16 *%p) { + %v = load atomic i16, i16* %p seq_cst, align 2 + %e = sext i16 %v to i64 + ret i64 %e +} + +; CHECK-LABEL: zext_i16_i64: +; CHECK: i64.atomic.load16_u $push0=, 0($0){{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @zext_i16_i64(i16 *%p) { + %v = load atomic i16, i16* %p seq_cst, align 2 + %e = zext i16 %v to i64 + ret i64 %e +} + +; CHECK-LABEL: sext_i32_i64: +; CHECK: i32.atomic.load $push0=, 0($0){{$}} +; CHECK: i64.extend_s/i32 $push1=, $pop0{{$}} +; CHECK-NEXT: return $pop1{{$}} +define i64 @sext_i32_i64(i32 *%p) { + %v = load atomic i32, i32* %p seq_cst, align 4 + %e = sext i32 %v to i64 + ret i64 %e +} + +; CHECK-LABEL: zext_i32_i64: +; CHECK: i64.atomic.load32_u $push0=, 0($0){{$}} +; CHECK: return $pop0{{$}} +define i64 @zext_i32_i64(i32 *%p) { + %v = load atomic i32, i32* %p seq_cst, align 4 + %e = zext i32 %v to i64 + ret i64 %e +} Index: test/CodeGen/WebAssembly/offset-atomics.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/offset-atomics.ll @@ -0,0 +1,307 @@ +; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals -mattr=+atomics | FileCheck %s + +; Test that atomic loads are assembled properly. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +; CHECK-LABEL: load_i32_no_offset: +; CHECK: i32.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i32 @load_i32_no_offset(i32 *%p) { + %v = load atomic i32, i32* %p seq_cst, align 4 + ret i32 %v +} + +; With an nuw add, we can fold an offset. + +; CHECK-LABEL: load_i32_with_folded_offset: +; CHECK: i32.atomic.load $push0=, 24($0){{$}} +define i32 @load_i32_with_folded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load atomic i32, i32* %s seq_cst, align 4 + ret i32 %t +} + +; With an inbounds gep, we can fold an offset. + +; CHECK-LABEL: load_i32_with_folded_gep_offset: +; CHECK: i32.atomic.load $push0=, 24($0){{$}} +define i32 @load_i32_with_folded_gep_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 6 + %t = load atomic i32, i32* %s seq_cst, align 4 + ret i32 %t +} + +; We can't fold a negative offset though, even with an inbounds gep. + +; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} +define i32 @load_i32_with_unfolded_gep_negative_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 -6 + %t = load atomic i32, i32* %s seq_cst, align 4 + ret i32 %t +} + +; Without nuw, and even with nsw, we can't fold an offset. + +; CHECK-LABEL: load_i32_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} +define i32 @load_i32_with_unfolded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load atomic i32, i32* %s seq_cst, align 4 + ret i32 %t +} + +; Without inbounds, we can't fold a gep offset. + +; CHECK-LABEL: load_i32_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} +define i32 @load_i32_with_unfolded_gep_offset(i32* %p) { + %s = getelementptr i32, i32* %p, i32 6 + %t = load atomic i32, i32* %s seq_cst, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_i64_no_offset: +; CHECK: i64.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @load_i64_no_offset(i64 *%p) { + %v = load atomic i64, i64* %p seq_cst, align 8 + ret i64 %v +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_folded_offset: +; CHECK: i64.atomic.load $push0=, 24($0){{$}} +define i64 @load_i64_with_folded_offset(i64* %p) { + %q = ptrtoint i64* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %t = load atomic i64, i64* %s seq_cst, align 8 + ret i64 %t +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_folded_gep_offset: +; CHECK: i64.atomic.load $push0=, 24($0){{$}} +define i64 @load_i64_with_folded_gep_offset(i64* %p) { + %s = getelementptr inbounds i64, i64* %p, i32 3 + %t = load atomic i64, i64* %s seq_cst, align 8 + ret i64 %t +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} +define i64 @load_i64_with_unfolded_gep_negative_offset(i64* %p) { + %s = getelementptr inbounds i64, i64* %p, i32 -3 + %t = load atomic i64, i64* %s seq_cst, align 8 + ret i64 %t +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} +define i64 @load_i64_with_unfolded_offset(i64* %p) { + %q = ptrtoint i64* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %t = load atomic i64, i64* %s seq_cst, align 8 + ret i64 %t +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} +define i64 @load_i64_with_unfolded_gep_offset(i64* %p) { + %s = getelementptr i64, i64* %p, i32 3 + %t = load atomic i64, i64* %s seq_cst, align 8 + ret i64 %t +} + +; CHECK-LABEL: load_i32_with_folded_or_offset: +; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} +; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} +define i32 @load_i32_with_folded_or_offset(i32 %x) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t1 = load atomic i8, i8* %arrayidx seq_cst, align 8 + %conv = sext i8 %t1 to i32 + ret i32 %conv +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: load_i32_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.load $push1=, 42($pop0){{$}} +define i32 @load_i32_from_numeric_address() { + %s = inttoptr i32 42 to i32* + %t = load atomic i32, i32* %s seq_cst, align 4 + ret i32 %t +} + + +; CHECK-LABEL: load_i32_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.load $push1=, gv($pop0){{$}} +@gv = global i32 0 +define i32 @load_i32_from_global_address() { + %t = load atomic i32, i32* @gv seq_cst, align 4 + ret i32 %t +} + +; Fold an offset into a sign-extending load. + +; CHECK-LABEL: load_i8_s_with_folded_offset: +; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0 +define i32 @load_i8_s_with_folded_offset(i8* %p) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %t = load atomic i8, i8* %s seq_cst, align 1 + %u = sext i8 %t to i32 + ret i32 %u +} + +; Fold a gep offset into a sign-extending load. + +; CHECK-LABEL: load_i8_s_with_folded_gep_offset: +; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} +; CHECK-NEXT: i32.extend8_s $push1=, $pop0 +define i32 @load_i8_s_with_folded_gep_offset(i8* %p) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + %t = load atomic i8, i8* %s seq_cst, align 1 + %u = sext i8 %t to i32 + ret i32 %u +} + +; CHECK-LABEL: load_i16_s_i64_with_folded_gep_offset: +; CHECK: i64.atomic.load16_u $push0=, 6($0){{$}} +define i64 @load_i16_s_i64_with_folded_gep_offset(i16* %p) { + %s = getelementptr inbounds i16, i16* %p, i32 3 + %t = load atomic i16, i16* %s seq_cst, align 2 + %u = zext i16 %t to i64 + ret i64 %u +} + +; CHECK-LABEL: load_i64_with_folded_or_offset: +; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} +; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} +define i64 @load_i64_with_folded_or_offset(i32 %x) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t1 = load atomic i8, i8* %arrayidx seq_cst, align 8 + %conv = sext i8 %t1 to i64 + ret i64 %conv +} + + +; Fold an offset into a zero-extending load. + +; CHECK-LABEL: load_i16_u_with_folded_offset: +; CHECK: i32.atomic.load16_u $push0=, 24($0){{$}} +define i32 @load_i16_u_with_folded_offset(i8* %p) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i16* + %t = load atomic i16, i16* %s seq_cst, align 2 + %u = zext i16 %t to i32 + ret i32 %u +} + +; Fold a gep offset into a zero-extending load. + +; CHECK-LABEL: load_i8_u_with_folded_gep_offset: +; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} +define i32 @load_i8_u_with_folded_gep_offset(i8* %p) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + %t = load atomic i8, i8* %s seq_cst, align 1 + %u = zext i8 %t to i32 + ret i32 %u +} + + +; When loading from a fixed address, materialize a zero. +; As above but with extending load. + +; CHECK-LABEL: load_zext_i32_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}} +define i32 @load_zext_i32_from_numeric_address() { + %s = inttoptr i32 42 to i16* + %t = load atomic i16, i16* %s seq_cst, align 2 + %u = zext i16 %t to i32 + ret i32 %u +} + +; CHECK-LABEL: load_sext_i32_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}} +; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} +@gv8 = global i8 0 +define i32 @load_sext_i32_from_global_address() { + %t = load atomic i8, i8* @gv8 seq_cst, align 1 + %u = sext i8 %t to i32 + ret i32 %u +} + +; Fold an offset into a sign-extending load. +; As above but 32 extended to 64 bit. +; CHECK-LABEL: load_i32_i64_s_with_folded_offset: +; CHECK: i32.atomic.load $push0=, 24($0){{$}} +; CHECK-NEXT: i64.extend_s/i32 $push1=, $pop0{{$}} +define i64 @load_i32_i64_s_with_folded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load atomic i32, i32* %s seq_cst, align 4 + %u = sext i32 %t to i64 + ret i64 %u +} + +; Fold a gep offset into a zero-extending load. +; As above but 32 extended to 64 bit. +; CHECK-LABEL: load_i32_i64_u_with_folded_gep_offset: +; CHECK: i64.atomic.load32_u $push0=, 96($0){{$}} +define i64 @load_i32_i64_u_with_folded_gep_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 24 + %t = load atomic i32, i32* %s seq_cst, align 4 + %u = zext i32 %t to i64 + ret i64 %u +} + +; i8 return value should test anyext loads +; CHECK-LABEL: ldi8_a1: +; CHECK-NEXT: i32.atomic.load8_u $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i8 @ldi8_a1(i8 *%p) { + %v = load atomic i8, i8* %p seq_cst, align 1 + ret i8 %v +}