diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -26,5 +26,6 @@ HANDLE_NODETYPE(VEC_SHR_U) HANDLE_NODETYPE(THROW) HANDLE_NODETYPE(MEMORY_COPY) +HANDLE_NODETYPE(MEMORY_FILL) // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -245,11 +245,13 @@ setMaxAtomicSizeInBitsSupported(64); if (Subtarget->hasBulkMemory()) { - // Using memory.copy is always better than using multiple loads and stores + // Use memory.copy and friends over multiple loads and stores MaxStoresPerMemcpy = 1; MaxStoresPerMemcpyOptSize = 1; MaxStoresPerMemmove = 1; MaxStoresPerMemmoveOptSize = 1; + MaxStoresPerMemset = 1; + MaxStoresPerMemsetOptSize = 1; } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrBulkMemory.td @@ -21,17 +21,19 @@ } // Bespoke types and nodes for bulk memory ops -def wasm_memcpy_t : SDTypeProfile<0, 3, - [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisInt<2>] +def wasm_memcpy_t : SDTypeProfile<0, 5, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisPtrTy<2>, SDTCisPtrTy<3>, SDTCisInt<4>] >; def wasm_memcpy : SDNode<"WebAssemblyISD::MEMORY_COPY", wasm_memcpy_t, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; -//===----------------------------------------------------------------------===// -// memory.init -//===----------------------------------------------------------------------===// +def wasm_memset_t : SDTypeProfile<0, 4, + [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>, SDTCisInt<3>] +>; +def wasm_memset : SDNode<"WebAssemblyISD::MEMORY_FILL", wasm_memset_t, + [SDNPHasChain, SDNPMayStore]>; -let mayStore = 1 in +let mayLoad =1, mayStore = 1 in defm MEMORY_INIT : BULK_I<(outs), (ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest, @@ -43,22 +45,27 @@ "memory.init\t$seg, $idx, $dest, $offset, $size", "memory.init\t$seg, $idx", 0x08>; -//===----------------------------------------------------------------------===// -// data.drop -//===----------------------------------------------------------------------===// - +let mayStore = 1 in defm DATA_DROP : BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg), [(int_wasm_data_drop (i32 imm:$seg))], "data.drop\t$seg", "data.drop\t$seg", 0x09>; -//===----------------------------------------------------------------------===// -// memory.copy -//===----------------------------------------------------------------------===// - let mayLoad = 1, mayStore = 1 in -defm MEMORY_COPY : BULK_I<(outs), (ins I32:$dst, I32:$src, I32:$len), - (outs), (ins), - [(wasm_memcpy I32:$dst, I32:$src, I32:$len)], - "memory.copy\t$dst, $src, $len", - "memory.copy", 0x0a>; +defm MEMORY_COPY : + BULK_I<(outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx, + I32:$dst, I32:$src, I32:$len), + (outs), (ins i32imm_op:$src_idx, i32imm_op:$dst_idx), + [(wasm_memcpy (i32 imm:$src_idx), (i32 imm:$dst_idx), + I32:$dst, I32:$src, I32:$len + )], + "memory.copy\t$src_idx, $dst_idx, $dst, $src, $len", + "memory.copy\t$src_idx, $dst_idx", 0x0a>; + +let mayStore = 1 in +defm MEMORY_FILL : + BULK_I<(outs), (ins i32imm_op:$idx, I32:$dst, I32:$value, I32:$size), + (outs), (ins i32imm_op:$idx), + [(wasm_memset (i32 imm:$idx), I32:$dst, I32:$value, I32:$size)], + "memory.fill\t$idx, $dst, $value, $size", + "memory.fill\t$idx", 0x0b>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h @@ -33,6 +33,10 @@ SDValue Op3, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp @@ -19,16 +19,18 @@ WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() = default; // anchor SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy( - SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, bool IsVolatile, bool AlwaysInline, + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { if (!DAG.getMachineFunction() .getSubtarget() .hasBulkMemory()) return SDValue(); - return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other, Chain, Op1, - Op2, Op3); + SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32); + return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other, + {Chain, MemIdx, MemIdx, Dst, Src, + DAG.getZExtOrTrunc(Size, DL, MVT::i32)}); } SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove( @@ -39,3 +41,18 @@ IsVolatile, false, DstPtrInfo, SrcPtrInfo); } + +SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Val, + SDValue Size, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo) const { + if (!DAG.getMachineFunction() + .getSubtarget() + .hasBulkMemory()) + return SDValue(); + + SDValue MemIdx = DAG.getConstant(0, DL, MVT::i32); + return DAG.getNode(WebAssemblyISD::MEMORY_FILL, DL, MVT::Other, Chain, MemIdx, + Dst, DAG.getAnyExtOrTrunc(Val, DL, MVT::i32), + DAG.getZExtOrTrunc(Size, DL, MVT::i32)); +} diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory-intrinsics.ll --- a/llvm/test/CodeGen/WebAssembly/bulk-memory-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory-intrinsics.ll @@ -7,12 +7,12 @@ ; CHECK-LABEL: memory_init: ; CHECK-NEXT: .functype memory_init (i32, i32, i32) -> () -; CHECK-NEXT: memory.init 0, 3, $0, $1, $2 +; CHECK-NEXT: memory.init 3, 0, $0, $1, $2 ; CHECK-NEXT: return declare void @llvm.wasm.memory.init.i32.i32.i8(i32, i32, i8*, i32, i32) define void @memory_init(i8* %dest, i32 %offset, i32 %size) { call void @llvm.wasm.memory.init.i32.i32.i8( - i32 0, i32 3, i8* %dest, i32 %offset, i32 %size + i32 3, i32 0, i8* %dest, i32 %offset, i32 %size ) ret void } diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll --- a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll @@ -6,40 +6,53 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" +declare void @llvm.memcpy.p0i8.p0i8.i8(i8*, i8*, i8, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) +declare void @llvm.memcpy.p0i32.p0i32.i32(i32*, i32*, i32, i1) + +declare void @llvm.memmove.p0i8.p0i8.i8(i8*, i8*, i8, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1) +declare void @llvm.memmove.p0i32.p0i32.i32(i32*, i32*, i32, i1) + +declare void @llvm.memset.p0i8.i8(i8*, i8, i8, i1) +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) +declare void @llvm.memset.p0i32.i32(i32*, i8, i32, i1) + ; CHECK-LABEL: memcpy_i8: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_i8 (i32, i32, i32) -> () -; BULK-MEM-NEXT: memory.copy $0, $1, $2 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 ; BULK-MEM-NEXT: return -declare void @llvm.memcpy.p0i8.p0i8.i32( - i8* %dest, i8* %src, i32 %len, i1 %volatile -) -define void @memcpy_i8(i8* %dest, i8* %src, i32 %len) { - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 0) +define void @memcpy_i8(i8* %dest, i8* %src, i8 zeroext %len) { + call void @llvm.memcpy.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0) ret void } ; CHECK-LABEL: memmove_i8: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memmove_i8 (i32, i32, i32) -> () -; BULK-MEM-NEXT: memory.copy $0, $1, $2 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 +; BULK-MEM-NEXT: return +define void @memmove_i8(i8* %dest, i8* %src, i8 zeroext %len) { + call void @llvm.memmove.p0i8.p0i8.i8(i8* %dest, i8* %src, i8 %len, i1 0) + ret void +} + +; CHECK-LABEL: memset_i8: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_i8 (i32, i32, i32) -> () +; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 ; BULK-MEM-NEXT: return -declare void @llvm.memmove.p0i8.p0i8.i32( - i8* %dest, i8* %src, i32 %len, i1 %volatile -) -define void @memmove_i8(i8* %dest, i8* %src, i32 %len) { - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 0) +define void @memset_i8(i8* %dest, i8 zeroext %val, i8 zeroext %len) { + call void @llvm.memset.p0i8.i8(i8* %dest, i8 %val, i8 %len, i1 0) ret void } ; CHECK-LABEL: memcpy_i32: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_i32 (i32, i32, i32) -> () -; BULK-MEM-NEXT: memory.copy $0, $1, $2 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 ; BULK-MEM-NEXT: return -declare void @llvm.memcpy.p0i32.p0i32.i32( - i32* %dest, i32* %src, i32 %len, i1 %volatile -) define void @memcpy_i32(i32* %dest, i32* %src, i32 %len) { call void @llvm.memcpy.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0) ret void @@ -48,16 +61,23 @@ ; CHECK-LABEL: memmove_i32: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memmove_i32 (i32, i32, i32) -> () -; BULK-MEM-NEXT: memory.copy $0, $1, $2 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 ; BULK-MEM-NEXT: return -declare void @llvm.memmove.p0i32.p0i32.i32( - i32* %dest, i32* %src, i32 %len, i1 %volatile -) define void @memmove_i32(i32* %dest, i32* %src, i32 %len) { call void @llvm.memmove.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0) ret void } +; CHECK-LABEL: memset_i32: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_i32 (i32, i32, i32) -> () +; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 +; BULK-MEM-NEXT: return +define void @memset_i32(i32* %dest, i8 %val, i32 %len) { + call void @llvm.memset.p0i32.i32(i32* %dest, i8 %val, i32 %len, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_1: ; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> () ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) @@ -78,11 +98,21 @@ ret void } +; CHECK-LABEL: memset_1: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_1 (i32, i32) -> () +; BULK-MEM-NEXT: i32.store8 0($0), $1 +; BULK-MEM-NEXT: return +define void @memset_1(i8* %dest, i8 %val) { + call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> () ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.copy $0, $1, $pop[[L0]] +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memcpy_1024(i8* %dest, i8* %src) { call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0) @@ -93,9 +123,20 @@ ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> () ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.copy $0, $1, $pop[[L0]] +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memmove_1024(i8* %dest, i8* %src) { call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0) ret void } + +; CHECK-LABEL: memset_1024: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> () +; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 +; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] +; BULK-MEM-NEXT: return +define void @memset_1024(i8* %dest, i8 %val) { + call void @llvm.memset.p0i8.i32(i8* %dest, i8 %val, i32 1024, i1 0) + ret void +} diff --git a/llvm/test/MC/WebAssembly/bulk-memory-encodings.s b/llvm/test/MC/WebAssembly/bulk-memory-encodings.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/WebAssembly/bulk-memory-encodings.s @@ -0,0 +1,18 @@ +# RUN: llvm-mc -show-encoding -triple=wasm32-unkown-unknown -mattr=+bulk-memory < %s | FileCheck %s + +main: + .functype main () -> () + + # CHECK: memory.init 3, 0 # encoding: [0xfc,0x08,0x03,0x00] + memory.init 3, 0 + + # CHECK: data.drop 3 # encoding: [0xfc,0x09,0x03] + data.drop 3 + + # CHECK: memory.copy 0, 0 # encoding: [0xfc,0x0a,0x00,0x00] + memory.copy 0, 0 + + # CHECK: memory.fill 0 # encoding: [0xfc,0x0b,0x00] + memory.fill 0 + + end_function