Index: lib/Target/WebAssembly/WebAssemblyInstrAtomics.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -40,7 +40,6 @@ def : LoadPatExternalSym; def : LoadPatExternalSym; - // Select loads with just a constant offset. def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; @@ -63,7 +62,7 @@ defm ATOMIC_LOAD32_U_I64 : WebAssemblyLoad; } // Defs = [ARGUMENTS] -// Fragments for exending loads. These are different from regular loads because +// Fragments for extending loads. These are different from regular loads because // the SDNodes are derived from AtomicSDNode rather than LoadSDNode and // therefore don't have the extension type field. So instead of matching that, // we match the patterns that the type legalizer expands them to. @@ -72,10 +71,10 @@ // i32 (zext (i8 (atomic_load_8))) gets legalized to // i32 (and (i32 (atomic_load_8)), 255) // These can be selected to a single zero-extending atomic load instruction. -def zext_aload_8 : PatFrag<(ops node:$addr), - (and (i32 (atomic_load_8 node:$addr)), 255)>; -def zext_aload_16 : PatFrag<(ops node:$addr), - (and (i32 (atomic_load_16 node:$addr)), 65535)>; +def zext_aload_8_32 : + PatFrag<(ops node:$addr), (and (i32 (atomic_load_8 node:$addr)), 255)>; +def zext_aload_16_32 : + PatFrag<(ops node:$addr), (and (i32 (atomic_load_16 node:$addr)), 65535)>; // Unlike regular loads, extension to i64 is handled differently than i32. // i64 (zext (i8 (atomic_load_8))) gets legalized to // i64 (and (i64 (anyext (i32 (atomic_load_8)))), 255) @@ -100,8 +99,8 @@ let Predicates = [HasAtomics] in { // Select zero-extending loads with no constant offset. -def : LoadPatNoOffset; -def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; def : LoadPatNoOffset; def : LoadPatNoOffset; def : LoadPatNoOffset; @@ -111,14 +110,13 @@ def : LoadPatNoOffset; def : LoadPatNoOffset; def : LoadPatNoOffset; -// 32->64 sext load gets selected as i32.atomic.load, i64.extend_s/i64 - +// 32->64 sext load gets selected as i32.atomic.load, i64.extend_s/i32 // Zero-extending loads with constant offset -def : LoadPatImmOff; -def : LoadPatImmOff; -def : LoadPatImmOff; -def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; def : LoadPatImmOff; @@ -137,8 +135,8 @@ def : LoadPatImmOff; // No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64 -def : LoadPatGlobalAddr; -def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; def : LoadPatGlobalAddr; def : LoadPatGlobalAddr; def : LoadPatGlobalAddr; @@ -147,8 +145,8 @@ def : LoadPatGlobalAddr; def : LoadPatGlobalAddr; -def : LoadPatExternalSym; -def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; def : LoadPatExternalSym; def : LoadPatExternalSym; def : LoadPatExternalSym; @@ -157,10 +155,9 @@ def : LoadPatExternalSym; def : LoadPatExternalSym; - // Extending loads with just a constant offset -def : LoadPatOffsetOnly; -def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; @@ -169,8 +166,8 @@ def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; -def : LoadPatGlobalAddrOffOnly; -def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; def : LoadPatGlobalAddrOffOnly; def : LoadPatGlobalAddrOffOnly; def : LoadPatGlobalAddrOffOnly; @@ -179,8 +176,8 @@ def : LoadPatGlobalAddrOffOnly; def : LoadPatGlobalAddrOffOnly; -def : LoadPatExternSymOffOnly; -def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; def : LoadPatExternSymOffOnly; def : LoadPatExternSymOffOnly; def : LoadPatExternSymOffOnly; @@ -189,7 +186,6 @@ def : LoadPatExternSymOffOnly; def : LoadPatExternSymOffOnly; - } // Predicates = [HasAtomics] //===----------------------------------------------------------------------===// @@ -209,52 +205,49 @@ let Predicates = [HasAtomics] in { // Select stores with no constant offset. -class AStorePatNoOffset : - Pat<(node I32:$addr, ty:$val), (inst 0, 0, $addr, $val)>; +class AStorePatNoOffset : + Pat<(kind I32:$addr, ty:$val), (inst 0, 0, I32:$addr, ty:$val)>; def : AStorePatNoOffset; def : AStorePatNoOffset; // Select stores with a constant offset. // Pattern with address + immediate offset -class AStorePatImmOff : - Pat<(storekind (operand I32:$addr, imm:$off), ty:$val), - (inst 0, imm:$off, $addr, ty:$val)>; +class AStorePatImmOff : + Pat<(kind (operand I32:$addr, imm:$off), ty:$val), + (inst 0, imm:$off, I32:$addr, ty:$val)>; def : AStorePatImmOff; def : AStorePatImmOff; def : AStorePatImmOff; def : AStorePatImmOff; -class AStorePatGlobalAddr : - Pat<(storekind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), - ty:$val), +class AStorePatGlobalAddr : + Pat<(kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)), + ty:$val), (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>; def : AStorePatGlobalAddr; def : AStorePatGlobalAddr; -class AStorePatExternalSym : - Pat<(storekind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)), - ty:$val), +class AStorePatExternalSym : + Pat<(kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)), ty:$val), (inst 0, texternalsym:$off, I32:$addr, ty:$val)>; def : AStorePatExternalSym; def : AStorePatExternalSym; // Select stores with just a constant offset. -class AStorePatOffsetOnly : - Pat<(storekind imm:$off, ty:$val), - (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; +class AStorePatOffsetOnly : + Pat<(kind imm:$off, ty:$val), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; def : AStorePatOffsetOnly; def : AStorePatOffsetOnly; -class AStorePatGlobalAddrOffOnly : - Pat<(storekind (WebAssemblywrapper tglobaladdr:$off), ty:$val), +class AStorePatGlobalAddrOffOnly : + Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val), (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>; def : AStorePatGlobalAddrOffOnly; def : AStorePatGlobalAddrOffOnly; -class AStorePatExternSymOffOnly : - Pat<(storekind (WebAssemblywrapper texternalsym:$off), ty:$val), +class AStorePatExternSymOffOnly : + Pat<(kind (WebAssemblywrapper texternalsym:$off), ty:$val), (inst 0, texternalsym:$off, (CONST_I32 0), ty:$val)>; def : AStorePatExternSymOffOnly; def : AStorePatExternSymOffOnly; @@ -275,9 +268,9 @@ // We don't have single truncating atomic store instructions. For 32-bit // instructions, we just need to match bare atomic stores. On the other hand, // truncating stores from i64 values are once truncated to i32 first. -class trunc_astore_64 : +class trunc_astore_64 : PatFrag<(ops node:$addr, node:$val), - (storekind node:$addr, (i32 (trunc (i64 node:$val))))>; + (kind node:$addr, (i32 (trunc (i64 node:$val))))>; def trunc_astore_8_64 : trunc_astore_64; def trunc_astore_16_64 : trunc_astore_64; def trunc_astore_32_64 : trunc_astore_64; Index: lib/Target/WebAssembly/WebAssemblyInstrMemory.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -75,8 +75,8 @@ } // Defs = [ARGUMENTS] // Select loads with no constant offset. -class LoadPatNoOffset : - Pat<(ty (node I32:$addr)), (inst 0, 0, $addr)>; +class LoadPatNoOffset : + Pat<(ty (kind I32:$addr)), (inst 0, 0, I32:$addr)>; def : LoadPatNoOffset; def : LoadPatNoOffset; @@ -87,9 +87,8 @@ // Select loads with a constant offset. // Pattern with address + immediate offset -class LoadPatImmOff : - Pat<(ty (loadkind (operand I32:$addr, imm:$off))), - (inst 0, imm:$off, $addr)>; +class LoadPatImmOff : + Pat<(ty (kind (operand I32:$addr, imm:$off))), (inst 0, imm:$off, I32:$addr)>; def : LoadPatImmOff; def : LoadPatImmOff; @@ -100,18 +99,18 @@ def : LoadPatImmOff; def : LoadPatImmOff; -class LoadPatGlobalAddr : - Pat<(ty (loadkind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (inst 0, tglobaladdr:$off, $addr)>; +class LoadPatGlobalAddr : + Pat<(ty (kind (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), + (inst 0, tglobaladdr:$off, I32:$addr)>; def : LoadPatGlobalAddr; def : LoadPatGlobalAddr; def : LoadPatGlobalAddr; def : LoadPatGlobalAddr; -class LoadPatExternalSym : - Pat<(ty (loadkind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (inst 0, texternalsym:$off, $addr)>; +class LoadPatExternalSym : + Pat<(ty (kind (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), + (inst 0, texternalsym:$off, I32:$addr)>; def : LoadPatExternalSym; def : LoadPatExternalSym; def : LoadPatExternalSym; @@ -119,16 +118,16 @@ // Select loads with just a constant offset. -class LoadPatOffsetOnly : - Pat<(ty (loadkind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>; +class LoadPatOffsetOnly : + Pat<(ty (kind imm:$off)), (inst 0, imm:$off, (CONST_I32 0))>; def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; -class LoadPatGlobalAddrOffOnly : - Pat<(ty (loadkind (WebAssemblywrapper tglobaladdr:$off))), +class LoadPatGlobalAddrOffOnly : + Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))), (inst 0, tglobaladdr:$off, (CONST_I32 0))>; def : LoadPatGlobalAddrOffOnly; @@ -136,8 +135,8 @@ def : LoadPatGlobalAddrOffOnly; def : LoadPatGlobalAddrOffOnly; -class LoadPatExternSymOffOnly : - Pat<(ty (loadkind (WebAssemblywrapper texternalsym:$off))), +class LoadPatExternSymOffOnly : + Pat<(ty (kind (WebAssemblywrapper texternalsym:$off))), (inst 0, texternalsym:$off, (CONST_I32 0))>; def : LoadPatExternSymOffOnly; def : LoadPatExternSymOffOnly; @@ -326,7 +325,7 @@ // Select stores with no constant offset. class StorePatNoOffset : - Pat<(node ty:$val, I32:$addr), (inst 0, 0, $addr, $val)>; + Pat<(node ty:$val, I32:$addr), (inst 0, 0, I32:$addr, ty:$val)>; def : StorePatNoOffset; def : StorePatNoOffset; @@ -334,9 +333,9 @@ def : StorePatNoOffset; // Select stores with a constant offset. -class StorePatImmOff : - Pat<(storekind ty:$val, (operand I32:$addr, imm:$off)), - (inst 0, imm:$off, $addr, ty:$val)>; +class StorePatImmOff : + Pat<(kind ty:$val, (operand I32:$addr, imm:$off)), + (inst 0, imm:$off, I32:$addr, ty:$val)>; def : StorePatImmOff; def : StorePatImmOff; @@ -347,18 +346,17 @@ def : StorePatImmOff; def : StorePatImmOff; -class StorePatGlobalAddr : - Pat<(storekind ty:$val, (regPlusGA I32:$addr, - (WebAssemblywrapper tglobaladdr:$off))), +class StorePatGlobalAddr : + Pat<(kind ty:$val, + (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), (inst 0, tglobaladdr:$off, I32:$addr, ty:$val)>; def : StorePatGlobalAddr; def : StorePatGlobalAddr; def : StorePatGlobalAddr; def : StorePatGlobalAddr; -class StorePatExternalSym : - Pat<(storekind ty:$val, (add I32:$addr, - (WebAssemblywrapper texternalsym:$off))), +class StorePatExternalSym : + Pat<(kind ty:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), (inst 0, texternalsym:$off, I32:$addr, ty:$val)>; def : StorePatExternalSym; def : StorePatExternalSym; @@ -366,24 +364,23 @@ def : StorePatExternalSym; // Select stores with just a constant offset. -class StorePatOffsetOnly : - Pat<(storekind ty:$val, imm:$off), - (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; +class StorePatOffsetOnly : + Pat<(kind ty:$val, imm:$off), (inst 0, imm:$off, (CONST_I32 0), ty:$val)>; def : StorePatOffsetOnly; def : StorePatOffsetOnly; def : StorePatOffsetOnly; def : StorePatOffsetOnly; -class StorePatGlobalAddrOffOnly : - Pat<(storekind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), +class StorePatGlobalAddrOffOnly : + Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)), (inst 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>; def : StorePatGlobalAddrOffOnly; def : StorePatGlobalAddrOffOnly; def : StorePatGlobalAddrOffOnly; def : StorePatGlobalAddrOffOnly; -class StorePatExternSymOffOnly : - Pat<(storekind ty:$val, (WebAssemblywrapper texternalsym:$off)), +class StorePatExternSymOffOnly : + Pat<(kind ty:$val, (WebAssemblywrapper texternalsym:$off)), (inst 0, texternalsym:$off, (CONST_I32 0), ty:$val)>; def : StorePatExternSymOffOnly; def : StorePatExternSymOffOnly; Index: test/CodeGen/WebAssembly/i32-load-store-alignment.ll =================================================================== --- test/CodeGen/WebAssembly/i32-load-store-alignment.ll +++ test/CodeGen/WebAssembly/i32-load-store-alignment.ll @@ -5,7 +5,9 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; Loads. +;===---------------------------------------------------------------------------- +; Loads +;===---------------------------------------------------------------------------- ; CHECK-LABEL: ldi32_a1: ; CHECK-NEXT: .param i32{{$}} @@ -63,7 +65,9 @@ ret i32 %v } -; Extending loads. +;===---------------------------------------------------------------------------- +; Extending loads +;===---------------------------------------------------------------------------- ; CHECK-LABEL: ldi8_a1: ; CHECK-NEXT: .param i32{{$}} @@ -115,7 +119,9 @@ ret i16 %v } -; Stores. +;===---------------------------------------------------------------------------- +; Stores +;===---------------------------------------------------------------------------- ; CHECK-LABEL: sti32_a1: ; CHECK-NEXT: .param i32, i32{{$}} @@ -166,7 +172,9 @@ ret void } -; Truncating stores. +;===---------------------------------------------------------------------------- +; Truncating stores +;===---------------------------------------------------------------------------- ; CHECK-LABEL: sti8_a1: ; CHECK-NEXT: .param i32, i32{{$}} @@ -213,9 +221,12 @@ ret void } -; Atomics. -; Wasm atomics have the alignment field, but it must always have the -; type's natural alignment. +;===---------------------------------------------------------------------------- +; Atomic loads +;===---------------------------------------------------------------------------- + +; Wasm atomics have the alignment field, but it must always have the type's +; natural alignment. ; CHECK-LABEL: ldi32_atomic_a4: ; CHECK-NEXT: .param i32{{$}} @@ -227,7 +238,7 @@ ret i32 %v } -; 8 is greater than the default alignment so it is rounded down to 4 +; 8 is greater than the default alignment so it is ignored. ; CHECK-LABEL: ldi32_atomic_a8: ; CHECK-NEXT: .param i32{{$}} @@ -239,6 +250,10 @@ ret i32 %v } +;===---------------------------------------------------------------------------- +; Atomic stores +;===---------------------------------------------------------------------------- + ; CHECK-LABEL: sti32_atomic_a4: ; CHECK-NEXT: .param i32, i32{{$}} ; CHECK-NEXT: i32.atomic.store 0($0), $1{{$}} @@ -248,6 +263,8 @@ ret void } +; 8 is greater than the default alignment so it is ignored. + ; CHECK-LABEL: sti32_atomic_a8: ; CHECK-NEXT: .param i32, i32{{$}} ; CHECK-NEXT: i32.atomic.store 0($0), $1{{$}} Index: test/CodeGen/WebAssembly/i64-load-store-alignment.ll =================================================================== --- test/CodeGen/WebAssembly/i64-load-store-alignment.ll +++ test/CodeGen/WebAssembly/i64-load-store-alignment.ll @@ -5,7 +5,9 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; Loads. +;===---------------------------------------------------------------------------- +; Loads +;===---------------------------------------------------------------------------- ; CHECK-LABEL: ldi64_a1: ; CHECK-NEXT: .param i32{{$}} @@ -73,7 +75,9 @@ ret i64 %v } -; Extending loads. +;===---------------------------------------------------------------------------- +; Extending loads +;===---------------------------------------------------------------------------- ; CHECK-LABEL: ldi8_a1: ; CHECK-NEXT: .param i32{{$}} @@ -174,7 +178,9 @@ ret i64 %w } -; Stores. +;===---------------------------------------------------------------------------- +; Stores +;===---------------------------------------------------------------------------- ; CHECK-LABEL: sti64_a1: ; CHECK-NEXT: .param i32, i64{{$}} @@ -234,7 +240,9 @@ ret void } -; Truncating stores. +;===---------------------------------------------------------------------------- +; Truncating stores +;===---------------------------------------------------------------------------- ; CHECK-LABEL: sti8_a1: ; CHECK-NEXT: .param i32, i64{{$}} @@ -326,7 +334,10 @@ ret void } -; Atomics. +;===---------------------------------------------------------------------------- +; Atomic loads +;===---------------------------------------------------------------------------- + ; Wasm atomics have the alignment field, but it must always have the type's ; natural alignment. @@ -341,6 +352,7 @@ } ; 16 is greater than the default alignment so it is ignored. + ; CHECK-LABEL: ldi64_atomic_a16: ; CHECK-NEXT: .param i32{{$}} ; CHECK-NEXT: .result i64{{$}} @@ -351,6 +363,10 @@ ret i64 %v } +;===---------------------------------------------------------------------------- +; Atomic stores +;===---------------------------------------------------------------------------- + ; CHECK-LABEL: sti64_atomic_a4: ; CHECK-NEXT: .param i32, i64{{$}} ; CHECK-NEXT: i64.atomic.store 0($0), $1{{$}} @@ -361,6 +377,7 @@ } ; 16 is greater than the default alignment so it is ignored. + ; CHECK-LABEL: sti64_atomic_a8: ; CHECK-NEXT: .param i32, i64{{$}} ; CHECK-NEXT: i64.atomic.store 0($0), $1{{$}} Index: test/CodeGen/WebAssembly/offset-atomics.ll =================================================================== --- test/CodeGen/WebAssembly/offset-atomics.ll +++ test/CodeGen/WebAssembly/offset-atomics.ll @@ -6,11 +6,15 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" +;===---------------------------------------------------------------------------- +; Atomic loads: 32-bit +;===---------------------------------------------------------------------------- + ; Basic load. ; CHECK-LABEL: load_i32_no_offset: -; CHECK: i32.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} +; CHECK: i32.atomic.load $push0=, 0($0){{$}} +; CHECK-NEXT: return $pop0{{$}} define i32 @load_i32_no_offset(i32 *%p) { %v = load atomic i32, i32* %p seq_cst, align 4 ret i32 %v @@ -19,7 +23,7 @@ ; With an nuw add, we can fold an offset. ; CHECK-LABEL: load_i32_with_folded_offset: -; CHECK: i32.atomic.load $push0=, 24($0){{$}} +; CHECK: i32.atomic.load $push0=, 24($0){{$}} define i32 @load_i32_with_folded_offset(i32* %p) { %q = ptrtoint i32* %p to i32 %r = add nuw i32 %q, 24 @@ -31,7 +35,7 @@ ; With an inbounds gep, we can fold an offset. ; CHECK-LABEL: load_i32_with_folded_gep_offset: -; CHECK: i32.atomic.load $push0=, 24($0){{$}} +; CHECK: i32.atomic.load $push0=, 24($0){{$}} define i32 @load_i32_with_folded_gep_offset(i32* %p) { %s = getelementptr inbounds i32, i32* %p, i32 6 %t = load atomic i32, i32* %s seq_cst, align 4 @@ -42,8 +46,8 @@ ; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset: ; CHECK: i32.const $push0=, -24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} -; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} define i32 @load_i32_with_unfolded_gep_negative_offset(i32* %p) { %s = getelementptr inbounds i32, i32* %p, i32 -6 %t = load atomic i32, i32* %s seq_cst, align 4 @@ -54,8 +58,8 @@ ; CHECK-LABEL: load_i32_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} -; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} define i32 @load_i32_with_unfolded_offset(i32* %p) { %q = ptrtoint i32* %p to i32 %r = add nsw i32 %q, 24 @@ -68,26 +72,52 @@ ; CHECK-LABEL: load_i32_with_unfolded_gep_offset: ; CHECK: i32.const $push0=, 24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} -; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.atomic.load $push2=, 0($pop1){{$}} define i32 @load_i32_with_unfolded_gep_offset(i32* %p) { %s = getelementptr i32, i32* %p, i32 6 %t = load atomic i32, i32* %s seq_cst, align 4 ret i32 %t } +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: load_i32_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.load $push1=, 42($pop0){{$}} +define i32 @load_i32_from_numeric_address() { + %s = inttoptr i32 42 to i32* + %t = load atomic i32, i32* %s seq_cst, align 4 + ret i32 %t +} + +; CHECK-LABEL: load_i32_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.load $push1=, gv($pop0){{$}} +@gv = global i32 0 +define i32 @load_i32_from_global_address() { + %t = load atomic i32, i32* @gv seq_cst, align 4 + ret i32 %t +} + +;===---------------------------------------------------------------------------- +; Atomic loads: 64-bit +;===---------------------------------------------------------------------------- + +; Basic load. + ; CHECK-LABEL: load_i64_no_offset: -; CHECK: i64.atomic.load $push[[NUM:[0-9]+]]=, 0($0){{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} +; CHECK: i64.atomic.load $push0=, 0($0){{$}} +; CHECK-NEXT: return $pop0{{$}} define i64 @load_i64_no_offset(i64 *%p) { %v = load atomic i64, i64* %p seq_cst, align 8 ret i64 %v } -; Same as above but with i64. +; With an nuw add, we can fold an offset. ; CHECK-LABEL: load_i64_with_folded_offset: -; CHECK: i64.atomic.load $push0=, 24($0){{$}} +; CHECK: i64.atomic.load $push0=, 24($0){{$}} define i64 @load_i64_with_folded_offset(i64* %p) { %q = ptrtoint i64* %p to i32 %r = add nuw i32 %q, 24 @@ -96,34 +126,34 @@ ret i64 %t } -; Same as above but with i64. +; With an inbounds gep, we can fold an offset. ; CHECK-LABEL: load_i64_with_folded_gep_offset: -; CHECK: i64.atomic.load $push0=, 24($0){{$}} +; CHECK: i64.atomic.load $push0=, 24($0){{$}} define i64 @load_i64_with_folded_gep_offset(i64* %p) { %s = getelementptr inbounds i64, i64* %p, i32 3 %t = load atomic i64, i64* %s seq_cst, align 8 ret i64 %t } -; Same as above but with i64. +; We can't fold a negative offset though, even with an inbounds gep. ; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset: ; CHECK: i32.const $push0=, -24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} -; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} define i64 @load_i64_with_unfolded_gep_negative_offset(i64* %p) { %s = getelementptr inbounds i64, i64* %p, i32 -3 %t = load atomic i64, i64* %s seq_cst, align 8 ret i64 %t } -; Same as above but with i64. +; Without nuw, and even with nsw, we can't fold an offset. ; CHECK-LABEL: load_i64_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} -; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} define i64 @load_i64_with_unfolded_offset(i64* %p) { %q = ptrtoint i64* %p to i32 %r = add nsw i32 %q, 24 @@ -132,31 +162,23 @@ ret i64 %t } -; Same as above but with i64. +; Without inbounds, we can't fold a gep offset. ; CHECK-LABEL: load_i64_with_unfolded_gep_offset: ; CHECK: i32.const $push0=, 24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} -; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.atomic.load $push2=, 0($pop1){{$}} define i64 @load_i64_with_unfolded_gep_offset(i64* %p) { %s = getelementptr i64, i64* %p, i32 3 %t = load atomic i64, i64* %s seq_cst, align 8 ret i64 %t } -; CHECK-LABEL: load_i32_with_folded_or_offset: -; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} -; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} -define i32 @load_i32_with_folded_or_offset(i32 %x) { - %and = and i32 %x, -4 - %t0 = inttoptr i32 %and to i8* - %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 - %t1 = load atomic i8, i8* %arrayidx seq_cst, align 8 - %conv = sext i8 %t1 to i32 - ret i32 %conv -} +;===---------------------------------------------------------------------------- +; Atomic stores: 32-bit +;===---------------------------------------------------------------------------- -; Same as above but with store. +; Basic store. ; CHECK-LABEL: store_i32_no_offset: ; CHECK-NEXT: .param i32, i32{{$}} @@ -167,7 +189,7 @@ ret void } -; Same as above but with store. +; With an nuw add, we can fold an offset. ; CHECK-LABEL: store_i32_with_folded_offset: ; CHECK: i32.atomic.store 24($0), $pop0{{$}} @@ -179,7 +201,7 @@ ret void } -; Same as above but with store. +; With an inbounds gep, we can fold an offset. ; CHECK-LABEL: store_i32_with_folded_gep_offset: ; CHECK: i32.atomic.store 24($0), $pop0{{$}} @@ -189,11 +211,11 @@ ret void } -; Same as above but with store. +; We can't fold a negative offset though, even with an inbounds gep. ; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset: -; CHECK: i32.const $push0=, -24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} ; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} define void @store_i32_with_unfolded_gep_negative_offset(i32* %p) { %s = getelementptr inbounds i32, i32* %p, i32 -6 @@ -201,11 +223,11 @@ ret void } -; Same as above but with store. +; Without nuw, and even with nsw, we can't fold an offset. ; CHECK-LABEL: store_i32_with_unfolded_offset: -; CHECK: i32.const $push0=, 24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} ; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} define void @store_i32_with_unfolded_offset(i32* %p) { %q = ptrtoint i32* %p to i32 @@ -215,11 +237,11 @@ ret void } -; Same as above but with store. +; Without inbounds, we can't fold a gep offset. ; CHECK-LABEL: store_i32_with_unfolded_gep_offset: -; CHECK: i32.const $push0=, 24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} ; CHECK: i32.atomic.store 0($pop1), $pop2{{$}} define void @store_i32_with_unfolded_gep_offset(i32* %p) { %s = getelementptr i32, i32* %p, i32 6 @@ -227,7 +249,32 @@ ret void } -; Same as above but with store with i64. +; When storing from a fixed address, materialize a zero. + +; CHECK-LABEL: store_i32_to_numeric_address: +; CHECK-NEXT: i32.const $push0=, 0{{$}} +; CHECK-NEXT: i32.const $push1=, 0{{$}} +; CHECK-NEXT: i32.atomic.store 42($pop0), $pop1{{$}} +define void @store_i32_to_numeric_address() { + %s = inttoptr i32 42 to i32* + store atomic i32 0, i32* %s seq_cst, align 4 + ret void +} + +; CHECK-LABEL: store_i32_to_global_address: +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.const $push1=, 0{{$}} +; CHECK: i32.atomic.store gv($pop0), $pop1{{$}} +define void @store_i32_to_global_address() { + store atomic i32 0, i32* @gv seq_cst, align 4 + ret void +} + +;===---------------------------------------------------------------------------- +; Atomic stores: 64-bit +;===---------------------------------------------------------------------------- + +; Basic store. ; CHECK-LABEL: store_i64_no_offset: ; CHECK-NEXT: .param i32, i64{{$}} @@ -238,7 +285,7 @@ ret void } -; Same as above but with store with i64. +; With an nuw add, we can fold an offset. ; CHECK-LABEL: store_i64_with_folded_offset: ; CHECK: i64.atomic.store 24($0), $pop0{{$}} @@ -250,7 +297,7 @@ ret void } -; Same as above but with store with i64. +; With an inbounds gep, we can fold an offset. ; CHECK-LABEL: store_i64_with_folded_gep_offset: ; CHECK: i64.atomic.store 24($0), $pop0{{$}} @@ -260,11 +307,11 @@ ret void } -; Same as above but with store with i64. +; We can't fold a negative offset though, even with an inbounds gep. ; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset: -; CHECK: i32.const $push0=, -24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} ; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} define void @store_i64_with_unfolded_gep_negative_offset(i64* %p) { %s = getelementptr inbounds i64, i64* %p, i32 -3 @@ -272,11 +319,11 @@ ret void } -; Same as above but with store with i64. +; Without nuw, and even with nsw, we can't fold an offset. ; CHECK-LABEL: store_i64_with_unfolded_offset: -; CHECK: i32.const $push0=, 24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} ; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} define void @store_i64_with_unfolded_offset(i64* %p) { %q = ptrtoint i64* %p to i32 @@ -286,11 +333,11 @@ ret void } -; Same as above but with store with i64. +; Without inbounds, we can't fold a gep offset. ; CHECK-LABEL: store_i64_with_unfolded_gep_offset: -; CHECK: i32.const $push0=, 24{{$}} -; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} ; CHECK: i64.atomic.store 0($pop1), $pop2{{$}} define void @store_i64_with_unfolded_gep_offset(i64* %p) { %s = getelementptr i64, i64* %p, i32 3 @@ -298,52 +345,16 @@ ret void } -; When loading from a fixed address, materialize a zero. - -; CHECK-LABEL: load_i32_from_numeric_address -; CHECK: i32.const $push0=, 0{{$}} -; CHECK: i32.atomic.load $push1=, 42($pop0){{$}} -define i32 @load_i32_from_numeric_address() { - %s = inttoptr i32 42 to i32* - %t = load atomic i32, i32* %s seq_cst, align 4 - ret i32 %t -} - - -; CHECK-LABEL: load_i32_from_global_address -; CHECK: i32.const $push0=, 0{{$}} -; CHECK: i32.atomic.load $push1=, gv($pop0){{$}} -@gv = global i32 0 -define i32 @load_i32_from_global_address() { - %t = load atomic i32, i32* @gv seq_cst, align 4 - ret i32 %t -} - -; CHECK-LABEL: store_i32_to_numeric_address: -; CHECK-NEXT: i32.const $push0=, 0{{$}} -; CHECK-NEXT: i32.const $push1=, 0{{$}} -; CHECK-NEXT: i32.atomic.store 42($pop0), $pop1{{$}} -define void @store_i32_to_numeric_address() { - %s = inttoptr i32 42 to i32* - store atomic i32 0, i32* %s seq_cst, align 4 - ret void -} - -; CHECK-LABEL: store_i32_to_global_address: -; CHECK: i32.const $push0=, 0{{$}} -; CHECK: i32.const $push1=, 0{{$}} -; CHECK: i32.atomic.store gv($pop0), $pop1{{$}} -define void @store_i32_to_global_address() { - store atomic i32 0, i32* @gv seq_cst, align 4 - ret void -} +;===---------------------------------------------------------------------------- +; Atomic sign-extending loads +;===---------------------------------------------------------------------------- ; Fold an offset into a sign-extending load. -; CHECK-LABEL: load_i8_s_with_folded_offset: +; CHECK-LABEL: load_i8_i32_s_with_folded_offset: ; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} ; CHECK-NEXT: i32.extend8_s $push1=, $pop0 -define i32 @load_i8_s_with_folded_offset(i8* %p) { +define i32 @load_i8_i32_s_with_folded_offset(i8* %p) { %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to i8* @@ -352,167 +363,232 @@ ret i32 %u } +; 32->64 sext load gets selected as i32.atomic.load, i64_extend_s/i32 +; CHECK-LABEL: load_i32_i64_s_with_folded_offset: +; CHECK: i32.atomic.load $push0=, 24($0){{$}} +; CHECK-NEXT: i64.extend_s/i32 $push1=, $pop0{{$}} +define i64 @load_i32_i64_s_with_folded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load atomic i32, i32* %s seq_cst, align 4 + %u = sext i32 %t to i64 + ret i64 %u +} + ; Fold a gep offset into a sign-extending load. -; CHECK-LABEL: load_i8_s_with_folded_gep_offset: +; CHECK-LABEL: load_i8_i32_s_with_folded_gep_offset: ; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} ; CHECK-NEXT: i32.extend8_s $push1=, $pop0 -define i32 @load_i8_s_with_folded_gep_offset(i8* %p) { +define i32 @load_i8_i32_s_with_folded_gep_offset(i8* %p) { %s = getelementptr inbounds i8, i8* %p, i32 24 %t = load atomic i8, i8* %s seq_cst, align 1 %u = sext i8 %t to i32 ret i32 %u } -; CHECK-LABEL: load_i16_s_i64_with_folded_gep_offset: -; CHECK: i64.atomic.load16_u $push0=, 6($0){{$}} -define i64 @load_i16_s_i64_with_folded_gep_offset(i16* %p) { - %s = getelementptr inbounds i16, i16* %p, i32 3 +; CHECK-LABEL: load_i16_i32_s_with_folded_gep_offset: +; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}} +; CHECK-NEXT: i32.extend16_s $push1=, $pop0 +define i32 @load_i16_i32_s_with_folded_gep_offset(i16* %p) { + %s = getelementptr inbounds i16, i16* %p, i32 24 %t = load atomic i16, i16* %s seq_cst, align 2 - %u = zext i16 %t to i64 + %u = sext i16 %t to i32 + ret i32 %u +} + +; CHECK-LABEL: load_i16_i64_s_with_folded_gep_offset: +; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}} +; CHECK-NEXT: i64.extend16_s $push1=, $pop0 +define i64 @load_i16_i64_s_with_folded_gep_offset(i16* %p) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %t = load atomic i16, i16* %s seq_cst, align 2 + %u = sext i16 %t to i64 ret i64 %u } -; CHECK-LABEL: load_i64_with_folded_or_offset: +; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as +; an 'add' if the or'ed bits are known to be zero. + +; CHECK-LABEL: load_i8_i32_s_with_folded_or_offset: +; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} +; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} +define i32 @load_i8_i32_s_with_folded_or_offset(i32 %x) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 + %conv = sext i8 %t1 to i32 + ret i32 %conv +} + +; CHECK-LABEL: load_i8_i64_s_with_folded_or_offset: ; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} ; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}} -define i64 @load_i64_with_folded_or_offset(i32 %x) { +define i64 @load_i8_i64_s_with_folded_or_offset(i32 %x) { %and = and i32 %x, -4 %t0 = inttoptr i32 %and to i8* %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 - %t1 = load atomic i8, i8* %arrayidx seq_cst, align 8 + %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 %conv = sext i8 %t1 to i64 ret i64 %conv } +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: load_i16_i32_s_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}} +; CHECK-NEXT: i32.extend16_s $push2=, $pop1 +define i32 @load_i16_i32_s_from_numeric_address() { + %s = inttoptr i32 42 to i16* + %t = load atomic i16, i16* %s seq_cst, align 2 + %u = sext i16 %t to i32 + ret i32 %u +} + +; CHECK-LABEL: load_i8_i32_s_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}} +; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} +@gv8 = global i8 0 +define i32 @load_i8_i32_s_from_global_address() { + %t = load atomic i8, i8* @gv8 seq_cst, align 1 + %u = sext i8 %t to i32 + ret i32 %u +} + +;===---------------------------------------------------------------------------- +; Atomic zero-extending loads +;===---------------------------------------------------------------------------- ; Fold an offset into a zero-extending load. -; CHECK-LABEL: load_i16_u_with_folded_offset: -; CHECK: i32.atomic.load16_u $push0=, 24($0){{$}} -define i32 @load_i16_u_with_folded_offset(i8* %p) { +; CHECK-LABEL: load_i8_i32_z_with_folded_offset: +; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} +define i32 @load_i8_i32_z_with_folded_offset(i8* %p) { %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 24 - %s = inttoptr i32 %r to i16* - %t = load atomic i16, i16* %s seq_cst, align 2 - %u = zext i16 %t to i32 + %s = inttoptr i32 %r to i8* + %t = load atomic i8, i8* %s seq_cst, align 1 + %u = zext i8 %t to i32 ret i32 %u } +; CHECK-LABEL: load_i32_i64_z_with_folded_offset: +; CHECK: i64.atomic.load32_u $push0=, 24($0){{$}} +define i64 @load_i32_i64_z_with_folded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load atomic i32, i32* %s seq_cst, align 4 + %u = zext i32 %t to i64 + ret i64 %u +} + ; Fold a gep offset into a zero-extending load. -; CHECK-LABEL: load_i8_u_with_folded_gep_offset: +; CHECK-LABEL: load_i8_i32_z_with_folded_gep_offset: ; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}} -define i32 @load_i8_u_with_folded_gep_offset(i8* %p) { +define i32 @load_i8_i32_z_with_folded_gep_offset(i8* %p) { %s = getelementptr inbounds i8, i8* %p, i32 24 %t = load atomic i8, i8* %s seq_cst, align 1 %u = zext i8 %t to i32 ret i32 %u } +; CHECK-LABEL: load_i16_i32_z_with_folded_gep_offset: +; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}} +define i32 @load_i16_i32_z_with_folded_gep_offset(i16* %p) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %t = load atomic i16, i16* %s seq_cst, align 2 + %u = zext i16 %t to i32 + ret i32 %u +} + +; CHECK-LABEL: load_i16_i64_z_with_folded_gep_offset: +; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}} +define i64 @load_i16_i64_z_with_folded_gep_offset(i16* %p) { + %s = getelementptr inbounds i16, i16* %p, i64 24 + %t = load atomic i16, i16* %s seq_cst, align 2 + %u = zext i16 %t to i64 + ret i64 %u +} + +; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as +; an 'add' if the or'ed bits are known to be zero. + +; CHECK-LABEL: load_i8_i32_z_with_folded_or_offset: +; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} +define i32 @load_i8_i32_z_with_folded_or_offset(i32 %x) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 + %conv = zext i8 %t1 to i32 + ret i32 %conv +} + +; CHECK-LABEL: load_i8_i64_z_with_folded_or_offset: +; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}} +define i64 @load_i8_i64_z_with_folded_or_offset(i32 %x) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t1 = load atomic i8, i8* %arrayidx seq_cst, align 1 + %conv = zext i8 %t1 to i64 + ret i64 %conv +} ; When loading from a fixed address, materialize a zero. -; As above but with extending load. -; CHECK-LABEL: load_zext_i32_from_numeric_address +; CHECK-LABEL: load_i16_i32_z_from_numeric_address ; CHECK: i32.const $push0=, 0{{$}} -; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}} -define i32 @load_zext_i32_from_numeric_address() { +; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}} +define i32 @load_i16_i32_z_from_numeric_address() { %s = inttoptr i32 42 to i16* %t = load atomic i16, i16* %s seq_cst, align 2 %u = zext i16 %t to i32 ret i32 %u } -; CHECK-LABEL: load_sext_i32_from_global_address +; CHECK-LABEL: load_i8_i32_z_from_global_address ; CHECK: i32.const $push0=, 0{{$}} -; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}} -; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}} -@gv8 = global i8 0 -define i32 @load_sext_i32_from_global_address() { +; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}} +define i32 @load_i8_i32_z_from_global_address() { %t = load atomic i8, i8* @gv8 seq_cst, align 1 - %u = sext i8 %t to i32 + %u = zext i8 %t to i32 ret i32 %u } -; Fold an offset into a sign-extending load. -; As above but 32 extended to 64 bit. -; CHECK-LABEL: load_i32_i64_s_with_folded_offset: -; CHECK: i32.atomic.load $push0=, 24($0){{$}} -; CHECK-NEXT: i64.extend_s/i32 $push1=, $pop0{{$}} -define i64 @load_i32_i64_s_with_folded_offset(i32* %p) { - %q = ptrtoint i32* %p to i32 - %r = add nuw i32 %q, 24 - %s = inttoptr i32 %r to i32* - %t = load atomic i32, i32* %s seq_cst, align 4 - %u = sext i32 %t to i64 - ret i64 %u -} - -; Fold a gep offset into a zero-extending load. -; As above but 32 extended to 64 bit. -; CHECK-LABEL: load_i32_i64_u_with_folded_gep_offset: -; CHECK: i64.atomic.load32_u $push0=, 96($0){{$}} -define i64 @load_i32_i64_u_with_folded_gep_offset(i32* %p) { - %s = getelementptr inbounds i32, i32* %p, i32 24 - %t = load atomic i32, i32* %s seq_cst, align 4 - %u = zext i32 %t to i64 - ret i64 %u -} - ; i8 return value should test anyext loads -; CHECK-LABEL: ldi8_a1: -; CHECK: i32.atomic.load8_u $push[[NUM:[0-9]+]]=, 0($0){{$}} -; CHECK-NEXT: return $pop[[NUM]]{{$}} -define i8 @ldi8_a1(i8 *%p) { + +; CHECK-LABEL: load_i8_i32_retvalue: +; CHECK: i32.atomic.load8_u $push0=, 0($0){{$}} +; CHECK-NEXT: return $pop0{{$}} +define i8 @load_i8_i32_retvalue(i8 *%p) { %v = load atomic i8, i8* %p seq_cst, align 1 ret i8 %v } -; Fold an offset into a truncating store. - -; CHECK-LABEL: store_i8_with_folded_offset: -; CHECK: i32.atomic.store8 24($0), $pop0{{$}} -define void @store_i8_with_folded_offset(i8* %p) { - %q = ptrtoint i8* %p to i32 - %r = add nuw i32 %q, 24 - %s = inttoptr i32 %r to i8* - store atomic i8 0, i8* %s seq_cst, align 1 - ret void -} +;===---------------------------------------------------------------------------- +; Atomic truncating stores +;===---------------------------------------------------------------------------- -; CHECK-LABEL: store_i16_with_folded_offset: -; CHECK: i32.atomic.store16 24($0), $pop0{{$}} -define void @store_i16_with_folded_offset(i16* %p) { - %q = ptrtoint i16* %p to i32 - %r = add nuw i32 %q, 24 - %s = inttoptr i32 %r to i16* - store atomic i16 0, i16* %s seq_cst, align 2 - ret void -} +; Fold an offset into a truncating store. -; CHECK-LABEL: store_i8_i64_with_folded_offset: -; CHECK: i64.atomic.store8 24($0), $1{{$}} -define void @store_i8_i64_with_folded_offset(i8* %p, i64 %v) { +; CHECK-LABEL: store_i8_i32_with_folded_offset: +; CHECK: i32.atomic.store8 24($0), $1{{$}} +define void @store_i8_i32_with_folded_offset(i8* %p, i32 %v) { %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to i8* - %t = trunc i64 %v to i8 + %t = trunc i32 %v to i8 store atomic i8 %t, i8* %s seq_cst, align 1 ret void } -; CHECK-LABEL: store_i16_i64_with_folded_offset: -; CHECK: i64.atomic.store16 24($0), $1{{$}} -define void @store_i16_i64_with_folded_offset(i16* %p, i64 %v) { - %q = ptrtoint i16* %p to i32 - %r = add nuw i32 %q, 24 - %s = inttoptr i32 %r to i16* - %t = trunc i64 %v to i16 - store atomic i16 %t, i16* %s seq_cst, align 2 - ret void -} - ; CHECK-LABEL: store_i32_i64_with_folded_offset: ; CHECK: i64.atomic.store32 24($0), $1{{$}} define void @store_i32_i64_with_folded_offset(i32* %p, i64 %v) { @@ -526,28 +602,21 @@ ; Fold a gep offset into a truncating store. -; CHECK-LABEL: store_i8_with_folded_gep_offset: -; CHECK: i32.atomic.store8 24($0), $pop0{{$}} -define void @store_i8_with_folded_gep_offset(i8* %p) { +; CHECK-LABEL: store_i8_i32_with_folded_gep_offset: +; CHECK: i32.atomic.store8 24($0), $1{{$}} +define void @store_i8_i32_with_folded_gep_offset(i8* %p, i32 %v) { %s = getelementptr inbounds i8, i8* %p, i32 24 - store atomic i8 0, i8* %s seq_cst, align 1 + %t = trunc i32 %v to i8 + store atomic i8 %t, i8* %s seq_cst, align 1 ret void } -; CHECK-LABEL: store_i16_with_folded_gep_offset: -; CHECK: i32.atomic.store16 48($0), $pop0{{$}} -define void @store_i16_with_folded_gep_offset(i16* %p) { +; CHECK-LABEL: store_i16_i32_with_folded_gep_offset: +; CHECK: i32.atomic.store16 48($0), $1{{$}} +define void @store_i16_i32_with_folded_gep_offset(i16* %p, i32 %v) { %s = getelementptr inbounds i16, i16* %p, i32 24 - store atomic i16 0, i16* %s seq_cst, align 2 - ret void -} - -; CHECK-LABEL: store_i8_i64_with_folded_gep_offset: -; CHECK: i64.atomic.store8 24($0), $1{{$}} -define void @store_i8_i64_with_folded_gep_offset(i8* %p, i64 %v) { - %s = getelementptr inbounds i8, i8* %p, i32 24 - %t = trunc i64 %v to i8 - store atomic i8 %t, i8* %s seq_cst, align 2 + %t = trunc i32 %v to i16 + store atomic i16 %t, i16* %s seq_cst, align 2 ret void } @@ -560,34 +629,17 @@ ret void } -; CHECK-LABEL: store_i32_i64_with_folded_gep_offset: -; CHECK: i64.atomic.store32 96($0), $1{{$}} -define void @store_i32_i64_with_folded_gep_offset(i32* %p, i64 %v) { - %s = getelementptr inbounds i32, i32* %p, i32 24 - %t = trunc i64 %v to i32 - store atomic i32 %t, i32* %s seq_cst, align 4 - ret void -} - -; Fold an or_is_add pattern based offset into a truncating store. +; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as +; an 'add' if the or'ed bits are known to be zero. -; CHECK-LABEL: store_i8_with_folded_or_offset: -; CHECK: i32.atomic.store8 2($pop{{[0-9]+}}), $pop{{[0-9]+}}{{$}} -define void @store_i8_with_folded_or_offset(i32 %x) { +; CHECK-LABEL: store_i8_i32_with_folded_or_offset: +; CHECK: i32.atomic.store8 2($pop{{[0-9]+}}), $1{{$}} +define void @store_i8_i32_with_folded_or_offset(i32 %x, i32 %v) { %and = and i32 %x, -4 %p = inttoptr i32 %and to i8* %arrayidx = getelementptr inbounds i8, i8* %p, i32 2 - store atomic i8 0, i8* %arrayidx seq_cst, align 1 - ret void -} - -; CHECK-LABEL: store_i16_with_folded_or_offset: -; CHECK: i32.atomic.store16 4($pop{{[0-9]+}}), $pop{{[0-9]+}}{{$}} -define void @store_i16_with_folded_or_offset(i32 %x) { - %and = and i32 %x, -4 - %p = inttoptr i32 %and to i16* - %arrayidx = getelementptr inbounds i16, i16* %p, i32 2 - store atomic i16 0, i16* %arrayidx seq_cst, align 2 + %t = trunc i32 %v to i8 + store atomic i8 %t, i8* %arrayidx seq_cst, align 1 ret void } @@ -601,25 +653,3 @@ store atomic i8 %t, i8* %arrayidx seq_cst, align 1 ret void } - -; CHECK-LABEL: store_i16_i64_with_folded_or_offset: -; CHECK: i64.atomic.store16 4($pop{{[0-9]+}}), $1{{$}} -define void @store_i16_i64_with_folded_or_offset(i32 %x, i64 %v) { - %and = and i32 %x, -4 - %p = inttoptr i32 %and to i16* - %arrayidx = getelementptr inbounds i16, i16* %p, i32 2 - %t = trunc i64 %v to i16 - store atomic i16 %t, i16* %arrayidx seq_cst, align 2 - ret void -} - -; CHECK-LABEL: store_i32_i64_with_folded_or_offset: -; CHECK: i64.atomic.store32 8($pop{{[0-9]+}}), $1{{$}} -define void @store_i32_i64_with_folded_or_offset(i32 %x, i64 %v) { - %and = and i32 %x, -4 - %p = inttoptr i32 %and to i32* - %arrayidx = getelementptr inbounds i32, i32* %p, i32 2 - %t = trunc i64 %v to i32 - store atomic i32 %t, i32* %arrayidx seq_cst, align 4 - ret void -} Index: test/CodeGen/WebAssembly/offset.ll =================================================================== --- test/CodeGen/WebAssembly/offset.ll +++ test/CodeGen/WebAssembly/offset.ll @@ -1,10 +1,24 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-explicit-locals | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-explicit-locals -disable-wasm-fallthrough-return-opt | FileCheck %s ; Test constant load and store address offsets. target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" +;===---------------------------------------------------------------------------- +; Loads: 32-bit +;===---------------------------------------------------------------------------- + +; Basic load. + +; CHECK-LABEL: load_i32_no_offset: +; CHECK: i32.load $push0=, 0($0){{$}} +; CHECK-NEXT: return $pop0{{$}} +define i32 @load_i32_no_offset(i32 *%p) { + %v = load i32, i32* %p + ret i32 %v +} + ; With an nuw add, we can fold an offset. ; CHECK-LABEL: load_i32_with_folded_offset: @@ -65,7 +79,41 @@ ret i32 %t } -; Same as above but with i64. +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: load_i32_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.load $push1=, 42($pop0){{$}} +define i32 @load_i32_from_numeric_address() { + %s = inttoptr i32 42 to i32* + %t = load i32, i32* %s + ret i32 %t +} + +; CHECK-LABEL: load_i32_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.load $push1=, gv($pop0){{$}} +@gv = global i32 0 +define i32 @load_i32_from_global_address() { + %t = load i32, i32* @gv + ret i32 %t +} + +;===---------------------------------------------------------------------------- +; Loads: 64-bit +;===---------------------------------------------------------------------------- + +; Basic load. + +; CHECK-LABEL: load_i64_no_offset: +; CHECK: i64.load $push0=, 0($0){{$}} +; CHECK-NEXT: return $pop0{{$}} +define i64 @load_i64_no_offset(i64 *%p) { + %v = load i64, i64* %p + ret i64 %v +} + +; With an nuw add, we can fold an offset. ; CHECK-LABEL: load_i64_with_folded_offset: ; CHECK: i64.load $push0=, 24($0){{$}} @@ -77,7 +125,7 @@ ret i64 %t } -; Same as above but with i64. +; With an inbounds gep, we can fold an offset. ; CHECK-LABEL: load_i64_with_folded_gep_offset: ; CHECK: i64.load $push0=, 24($0){{$}} @@ -87,7 +135,7 @@ ret i64 %t } -; Same as above but with i64. +; We can't fold a negative offset though, even with an inbounds gep. ; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset: ; CHECK: i32.const $push0=, -24{{$}} @@ -99,7 +147,7 @@ ret i64 %t } -; Same as above but with i64. +; Without nuw, and even with nsw, we can't fold an offset. ; CHECK-LABEL: load_i64_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} @@ -113,7 +161,7 @@ ret i64 %t } -; Same as above but with i64. +; Without inbounds, we can't fold a gep offset. ; CHECK-LABEL: load_i64_with_unfolded_gep_offset: ; CHECK: i32.const $push0=, 24{{$}} @@ -125,18 +173,22 @@ ret i64 %t } -; CHECK-LABEL: load_i32_with_folded_or_offset: -; CHECK: i32.load8_s $push{{[0-9]+}}=, 2($pop{{[0-9]+}}){{$}} -define i32 @load_i32_with_folded_or_offset(i32 %x) { - %and = and i32 %x, -4 - %t0 = inttoptr i32 %and to i8* - %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 - %t1 = load i8, i8* %arrayidx, align 1 - %conv = sext i8 %t1 to i32 - ret i32 %conv +;===---------------------------------------------------------------------------- +; Stores: 32-bit +;===---------------------------------------------------------------------------- + +; Basic store. + +; CHECK-LABEL: store_i32_no_offset: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @store_i32_no_offset(i32 *%p, i32 %v) { + store i32 %v, i32* %p + ret void } -; Same as above but with store. +; With an nuw add, we can fold an offset. ; CHECK-LABEL: store_i32_with_folded_offset: ; CHECK: i32.store 24($0), $pop0{{$}} @@ -148,7 +200,7 @@ ret void } -; Same as above but with store. +; With an inbounds gep, we can fold an offset. ; CHECK-LABEL: store_i32_with_folded_gep_offset: ; CHECK: i32.store 24($0), $pop0{{$}} @@ -158,7 +210,7 @@ ret void } -; Same as above but with store. +; We can't fold a negative offset though, even with an inbounds gep. ; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset: ; CHECK: i32.const $push0=, -24{{$}} @@ -170,7 +222,7 @@ ret void } -; Same as above but with store. +; Without nuw, and even with nsw, we can't fold an offset. ; CHECK-LABEL: store_i32_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} @@ -184,7 +236,7 @@ ret void } -; Same as above but with store. +; Without inbounds, we can't fold a gep offset. ; CHECK-LABEL: store_i32_with_unfolded_gep_offset: ; CHECK: i32.const $push0=, 24{{$}} @@ -196,7 +248,32 @@ ret void } -; Same as above but with store with i64. +; When storing from a fixed address, materialize a zero. + +; CHECK-LABEL: store_i32_to_numeric_address: +; CHECK-NEXT: i32.const $push0=, 0{{$}} +; CHECK-NEXT: i32.const $push1=, 0{{$}} +; CHECK-NEXT: i32.store 42($pop0), $pop1{{$}} +define void @store_i32_to_numeric_address() { + %s = inttoptr i32 42 to i32* + store i32 0, i32* %s + ret void +} + +; CHECK-LABEL: store_i32_to_global_address: +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.const $push1=, 0{{$}} +; CHECK: i32.store gv($pop0), $pop1{{$}} +define void @store_i32_to_global_address() { + store i32 0, i32* @gv + ret void +} + +;===---------------------------------------------------------------------------- +; Stores: 64-bit +;===---------------------------------------------------------------------------- + +; Basic store. ; CHECK-LABEL: store_i64_with_folded_offset: ; CHECK: i64.store 24($0), $pop0{{$}} @@ -208,7 +285,7 @@ ret void } -; Same as above but with store with i64. +; With an nuw add, we can fold an offset. ; CHECK-LABEL: store_i64_with_folded_gep_offset: ; CHECK: i64.store 24($0), $pop0{{$}} @@ -218,7 +295,7 @@ ret void } -; Same as above but with store with i64. +; With an inbounds gep, we can fold an offset. ; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset: ; CHECK: i32.const $push0=, -24{{$}} @@ -230,7 +307,7 @@ ret void } -; Same as above but with store with i64. +; We can't fold a negative offset though, even with an inbounds gep. ; CHECK-LABEL: store_i64_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} @@ -244,7 +321,7 @@ ret void } -; Same as above but with store with i64. +; Without nuw, and even with nsw, we can't fold an offset. ; CHECK-LABEL: store_i64_with_unfolded_gep_offset: ; CHECK: i32.const $push0=, 24{{$}} @@ -256,6 +333,8 @@ ret void } +; Without inbounds, we can't fold a gep offset. + ; CHECK-LABEL: store_i32_with_folded_or_offset: ; CHECK: i32.store8 2($pop{{[0-9]+}}), $pop{{[0-9]+}}{{$}} define void @store_i32_with_folded_or_offset(i32 %x) { @@ -266,50 +345,15 @@ ret void } -; When loading from a fixed address, materialize a zero. - -; CHECK-LABEL: load_i32_from_numeric_address -; CHECK: i32.const $push0=, 0{{$}} -; CHECK: i32.load $push1=, 42($pop0){{$}} -define i32 @load_i32_from_numeric_address() { - %s = inttoptr i32 42 to i32* - %t = load i32, i32* %s - ret i32 %t -} - -; CHECK-LABEL: load_i32_from_global_address -; CHECK: i32.const $push0=, 0{{$}} -; CHECK: i32.load $push1=, gv($pop0){{$}} -@gv = global i32 0 -define i32 @load_i32_from_global_address() { - %t = load i32, i32* @gv - ret i32 %t -} - -; CHECK-LABEL: store_i32_to_numeric_address: -; CHECK-NEXT: i32.const $push0=, 0{{$}} -; CHECK-NEXT: i32.const $push1=, 0{{$}} -; CHECK-NEXT: i32.store 42($pop0), $pop1{{$}} -define void @store_i32_to_numeric_address() { - %s = inttoptr i32 42 to i32* - store i32 0, i32* %s - ret void -} - -; CHECK-LABEL: store_i32_to_global_address: -; CHECK: i32.const $push0=, 0{{$}} -; CHECK: i32.const $push1=, 0{{$}} -; CHECK: i32.store gv($pop0), $pop1{{$}} -define void @store_i32_to_global_address() { - store i32 0, i32* @gv - ret void -} +;===---------------------------------------------------------------------------- +; Sign-extending loads +;===---------------------------------------------------------------------------- ; Fold an offset into a sign-extending load. -; CHECK-LABEL: load_i8_s_with_folded_offset: +; CHECK-LABEL: load_i8_i32_s_with_folded_offset: ; CHECK: i32.load8_s $push0=, 24($0){{$}} -define i32 @load_i8_s_with_folded_offset(i8* %p) { +define i32 @load_i8_i32_s_with_folded_offset(i8* %p) { %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to i8* @@ -318,22 +362,102 @@ ret i32 %u } +; CHECK-LABEL: load_i32_i64_s_with_folded_offset: +; CHECK: i64.load32_s $push0=, 24($0){{$}} +define i64 @load_i32_i64_s_with_folded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load i32, i32* %s + %u = sext i32 %t to i64 + ret i64 %u +} + ; Fold a gep offset into a sign-extending load. -; CHECK-LABEL: load_i8_s_with_folded_gep_offset: +; CHECK-LABEL: load_i8_i32_s_with_folded_gep_offset: ; CHECK: i32.load8_s $push0=, 24($0){{$}} -define i32 @load_i8_s_with_folded_gep_offset(i8* %p) { +define i32 @load_i8_i32_s_with_folded_gep_offset(i8* %p) { %s = getelementptr inbounds i8, i8* %p, i32 24 %t = load i8, i8* %s %u = sext i8 %t to i32 ret i32 %u } +; CHECK-LABEL: load_i16_i32_s_with_folded_gep_offset: +; CHECK: i32.load16_s $push0=, 48($0){{$}} +define i32 @load_i16_i32_s_with_folded_gep_offset(i16* %p) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %t = load i16, i16* %s + %u = sext i16 %t to i32 + ret i32 %u +} + +; CHECK-LABEL: load_i16_i64_s_with_folded_gep_offset: +; CHECK: i64.load16_s $push0=, 48($0){{$}} +define i64 @load_i16_i64_s_with_folded_gep_offset(i16* %p) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %t = load i16, i16* %s + %u = sext i16 %t to i64 + ret i64 %u +} + +; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as +; an 'add' if the or'ed bits are known to be zero. + +; CHECK-LABEL: load_i8_i32_s_with_folded_or_offset: +; CHECK: i32.load8_s $push{{[0-9]+}}=, 2($pop{{[0-9]+}}){{$}} +define i32 @load_i8_i32_s_with_folded_or_offset(i32 %x) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t1 = load i8, i8* %arrayidx + %conv = sext i8 %t1 to i32 + ret i32 %conv +} + +; CHECK-LABEL: load_i8_i64_s_with_folded_or_offset: +; CHECK: i64.load8_s $push{{[0-9]+}}=, 2($pop{{[0-9]+}}){{$}} +define i64 @load_i8_i64_s_with_folded_or_offset(i32 %x) { + %and = and i32 %x, -4 + %t0 = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %t0, i32 2 + %t1 = load i8, i8* %arrayidx + %conv = sext i8 %t1 to i64 + ret i64 %conv +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: load_i16_i32_s_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.load16_s $push1=, 42($pop0){{$}} +define i32 @load_i16_i32_s_from_numeric_address() { + %s = inttoptr i32 42 to i16* + %t = load i16, i16* %s + %u = sext i16 %t to i32 + ret i32 %u +} + +; CHECK-LABEL: load_i8_i32_s_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.load8_s $push1=, gv8($pop0){{$}} +@gv8 = global i8 0 +define i32 @load_i8_i32_s_from_global_address() { + %t = load i8, i8* @gv8 + %u = sext i8 %t to i32 + ret i32 %u +} + +;===---------------------------------------------------------------------------- +; Zero-extending loads +;===---------------------------------------------------------------------------- + ; Fold an offset into a zero-extending load. -; CHECK-LABEL: load_i8_u_with_folded_offset: +; CHECK-LABEL: load_i8_i32_z_with_folded_offset: ; CHECK: i32.load8_u $push0=, 24($0){{$}} -define i32 @load_i8_u_with_folded_offset(i8* %p) { +define i32 @load_i8_i32_z_with_folded_offset(i8* %p) { %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to i8* @@ -342,39 +466,162 @@ ret i32 %u } +; CHECK-LABEL: load_i32_i64_z_with_folded_offset: +; CHECK: i64.load32_u $push0=, 24($0){{$}} +define i64 @load_i32_i64_z_with_folded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load i32, i32* %s + %u = zext i32 %t to i64 + ret i64 %u +} + ; Fold a gep offset into a zero-extending load. -; CHECK-LABEL: load_i8_u_with_folded_gep_offset: +; CHECK-LABEL: load_i8_i32_z_with_folded_gep_offset: ; CHECK: i32.load8_u $push0=, 24($0){{$}} -define i32 @load_i8_u_with_folded_gep_offset(i8* %p) { +define i32 @load_i8_i32_z_with_folded_gep_offset(i8* %p) { %s = getelementptr inbounds i8, i8* %p, i32 24 %t = load i8, i8* %s %u = zext i8 %t to i32 ret i32 %u } +; CHECK-LABEL: load_i16_i32_z_with_folded_gep_offset: +; CHECK: i32.load16_u $push0=, 48($0){{$}} +define i32 @load_i16_i32_z_with_folded_gep_offset(i16* %p) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %t = load i16, i16* %s + %u = zext i16 %t to i32 + ret i32 %u +} + +; CHECK-LABEL: load_i16_i64_z_with_folded_gep_offset: +; CHECK: i64.load16_u $push0=, 48($0){{$}} +define i64 @load_i16_i64_z_with_folded_gep_offset(i16* %p) { + %s = getelementptr inbounds i16, i16* %p, i64 24 + %t = load i16, i16* %s + %u = zext i16 %t to i64 + ret i64 %u +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: load_i16_i32_z_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.load16_u $push1=, 42($pop0){{$}} +define i32 @load_i16_i32_z_from_numeric_address() { + %s = inttoptr i32 42 to i16* + %t = load i16, i16* %s + %u = zext i16 %t to i32 + ret i32 %u +} + +; CHECK-LABEL: load_i8_i32_z_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.load8_u $push1=, gv8($pop0){{$}} +define i32 @load_i8_i32_z_from_global_address() { + %t = load i8, i8* @gv8 + %u = zext i8 %t to i32 + ret i32 %u +} + +; i8 return value should test anyext loads +; CHECK-LABEL: load_i8_i32_retvalue: +; CHECK: i32.load8_u $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i8 @load_i8_i32_retvalue(i8 *%p) { + %v = load i8, i8* %p + ret i8 %v +} + +;===---------------------------------------------------------------------------- +; Truncating stores +;===---------------------------------------------------------------------------- + ; Fold an offset into a truncating store. -; CHECK-LABEL: store_i8_with_folded_offset: -; CHECK: i32.store8 24($0), $pop0{{$}} -define void @store_i8_with_folded_offset(i8* %p) { +; CHECK-LABEL: store_i8_i32_with_folded_offset: +; CHECK: i32.store8 24($0), $1{{$}} +define void @store_i8_i32_with_folded_offset(i8* %p, i32 %v) { %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to i8* - store i8 0, i8* %s + %t = trunc i32 %v to i8 + store i8 %t, i8* %s + ret void +} + +; CHECK-LABEL: store_i32_i64_with_folded_offset: +; CHECK: i64.store32 24($0), $1{{$}} +define void @store_i32_i64_with_folded_offset(i32* %p, i64 %v) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = trunc i64 %v to i32 + store i32 %t, i32* %s ret void } ; Fold a gep offset into a truncating store. -; CHECK-LABEL: store_i8_with_folded_gep_offset: -; CHECK: i32.store8 24($0), $pop0{{$}} -define void @store_i8_with_folded_gep_offset(i8* %p) { +; CHECK-LABEL: store_i8_i32_with_folded_gep_offset: +; CHECK: i32.store8 24($0), $1{{$}} +define void @store_i8_i32_with_folded_gep_offset(i8* %p, i32 %v) { %s = getelementptr inbounds i8, i8* %p, i32 24 - store i8 0, i8* %s + %t = trunc i32 %v to i8 + store i8 %t, i8* %s + ret void +} + +; CHECK-LABEL: store_i16_i32_with_folded_gep_offset: +; CHECK: i32.store16 48($0), $1{{$}} +define void @store_i16_i32_with_folded_gep_offset(i16* %p, i32 %v) { + %s = getelementptr inbounds i16, i16* %p, i32 24 + %t = trunc i32 %v to i16 + store i16 %t, i16* %s + ret void +} + +; CHECK-LABEL: store_i16_i64_with_folded_gep_offset: +; CHECK: i64.store16 48($0), $1{{$}} +define void @store_i16_i64_with_folded_gep_offset(i16* %p, i64 %v) { + %s = getelementptr inbounds i16, i16* %p, i64 24 + %t = trunc i64 %v to i16 + store i16 %t, i16* %s + ret void +} + +; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as +; an 'add' if the or'ed bits are known to be zero. + +; CHECK-LABEL: store_i8_i32_with_folded_or_offset: +; CHECK: i32.store8 2($pop{{[0-9]+}}), $1{{$}} +define void @store_i8_i32_with_folded_or_offset(i32 %x, i32 %v) { + %and = and i32 %x, -4 + %p = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %p, i32 2 + %t = trunc i32 %v to i8 + store i8 %t, i8* %arrayidx + ret void +} + +; CHECK-LABEL: store_i8_i64_with_folded_or_offset: +; CHECK: i64.store8 2($pop{{[0-9]+}}), $1{{$}} +define void @store_i8_i64_with_folded_or_offset(i32 %x, i64 %v) { + %and = and i32 %x, -4 + %p = inttoptr i32 %and to i8* + %arrayidx = getelementptr inbounds i8, i8* %p, i32 2 + %t = trunc i64 %v to i8 + store i8 %t, i8* %arrayidx ret void } +;===---------------------------------------------------------------------------- +; Aggregate values +;===---------------------------------------------------------------------------- + ; Fold the offsets when lowering aggregate loads and stores. ; CHECK-LABEL: aggregate_load_store: