Index: llvm/trunk/lib/Target/WebAssembly/README.txt =================================================================== --- llvm/trunk/lib/Target/WebAssembly/README.txt +++ llvm/trunk/lib/Target/WebAssembly/README.txt @@ -38,11 +38,6 @@ //===---------------------------------------------------------------------===// -Load and store instructions can have a constant offset. We should (a) model -this, and (b) do address-mode folding with it. - -//===---------------------------------------------------------------------===// - Br, br_if, and tableswitch instructions can support having a value on the expression stack across the jump (sometimes). We should (a) model this, and (b) extend the stackifier to utilize it. Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -54,6 +54,8 @@ StringRef Constraint, MVT VT) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -261,6 +261,24 @@ return true; } +bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, + Type *Ty, + unsigned AS) const { + // WebAssembly offsets are added as unsigned without wrapping. The + // isLegalAddressingMode gives us no way to determine if wrapping could be + // happening, so we approximate this by accepting only non-negative offsets. + if (AM.BaseOffs < 0) + return false; + + // WebAssembly has no scale register operands. + if (AM.Scale != 0) + return false; + + // Everything else is legal. + return true; +} + //===----------------------------------------------------------------------===// // WebAssembly Lowering private implementation. //===----------------------------------------------------------------------===// @@ -408,7 +426,8 @@ if (In.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); if (In.Flags.isInConsecutiveRegsLast()) - fail(DL, DAG, "WebAssembly hasn't implemented cons regs last return values"); + fail(DL, DAG, + "WebAssembly hasn't implemented cons regs last return values"); // Ignore In.getOrigAlign() because all our arguments are passed in // registers. Tys.push_back(In.VT); @@ -551,9 +570,9 @@ assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); if (GA->getAddressSpace() != 0) fail(DL, DAG, "WebAssembly only expects the 0 address space"); - return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, - DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, - GA->getOffset())); + return DAG.getNode( + WebAssemblyISD::Wrapper, DL, VT, + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset())); } SDValue Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -14,14 +14,20 @@ // TODO: // - HasAddr64 -// - WebAssemblyTargetLowering::isLegalAddressingMode // - WebAssemblyTargetLowering having to do with atomics -// - Each has optional alignment and immediate byte offset. +// - Each has optional alignment. // WebAssembly has i8/i16/i32/i64/f32/f64 memory types, but doesn't have i8/i16 // local types. These memory-only types instead zero- or sign-extend into local // types when loading, and truncate when storing. +// WebAssembly constant offsets are performed as unsigned with infinite +// precision, so we need to check for NoUnsignedWrap so that we don't fold an +// offset for an add that needs wrapping. +def regPlusImm : PatFrag<(ops node:$off, node:$addr), + (add node:$addr, node:$off), + [{ return N->getFlags()->hasNoUnsignedWrap(); }]>; + let Defs = [ARGUMENTS] in { // Basic load. @@ -34,6 +40,64 @@ def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr), [], "f64.load\t$dst, $off($addr)">; +} // Defs = [ARGUMENTS] + +// Select loads with no constant offset. +def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>; +def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>; +def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; +def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; + +// Select loads with a constant offset. +def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_I32 imm:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_I64 imm:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_F32 imm:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))), + (LOAD_F64 imm:$off, $addr)>; +def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_I64 tglobaladdr:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_F32 tglobaladdr:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD_F64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_I64 texternalsym:$off, $addr)>; +def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_F32 texternalsym:$off, $addr)>; +def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD_F64 texternalsym:$off, $addr)>; + +// Select loads with just a constant offset. +def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0))>; +def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_F32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))), + (LOAD_F64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_F32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))), + (LOAD_F64 texternalsym:$off, (CONST_I32 0))>; + +let Defs = [ARGUMENTS] in { + // Extending load. def LOAD8_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], "i32.load8_s\t$dst, $off($addr)">; @@ -58,12 +122,6 @@ } // Defs = [ARGUMENTS] -// Select loads with no constant offset. -def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>; -def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>; -def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; -def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; - // Select extending loads with no constant offset. def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>; def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; @@ -76,13 +134,189 @@ def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>; def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; -// "Don't care" extending load become zero-extending load. +// Select extending loads with a constant offset. +def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_S_I32 imm:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I32 imm:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_S_I32 imm:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I32 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_S_I64 imm:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_U_I64 imm:$off, $addr)>; +def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_S_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_S_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_S_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_S_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_S_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_S_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_U_I64 texternalsym:$off, $addr)>; + +// Select extending loads with just a constant offset. +def : Pat<(i32 (sextloadi8 imm:$off)), (LOAD8_S_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 imm:$off)), (LOAD16_S_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 imm:$off)), (LOAD8_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 imm:$off)), (LOAD16_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 imm:$off)), (LOAD32_S_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0))>; + +// Resolve "don't care" extending loads to zero-extending loads. This is +// somewhat arbitrary, but zero-extending is conceptually simpler. + +// Select "don't care" extending loads with no constant offset. def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; +// Select "don't care" extending loads with a constant offset. +def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I32 imm:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I32 imm:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))), + (LOAD8_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))), + (LOAD16_U_I64 imm:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))), + (LOAD32_U_I64 imm:$off, $addr)>; +def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I32 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD8_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD16_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))), + (LOAD32_U_I64 tglobaladdr:$off, $addr)>; +def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I32 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD8_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD16_U_I64 texternalsym:$off, $addr)>; +def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))), + (LOAD32_U_I64 texternalsym:$off, $addr)>; + +// Select "don't care" extending loads with just a constant offset. +def : Pat<(i32 (extloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))), + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))), + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; +def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))), + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + let Defs = [ARGUMENTS] in { // Basic store. @@ -102,17 +336,63 @@ } // Defs = [ARGUMENTS] +// Select stores with no constant offset. def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>; def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>; def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>; def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>; -// FIXME: This pattern matches an immediate to actually use the offset field -// in the store instruction; however only unsigned offsets are supported in -// wasm, so we need to constrain the immediate we match. This may require -// custom code rather than a simple pattern. -// def : Pat<(store I32:$val, (add I32:$addr, (i32 imm:$off))), -// (STORE_I32 imm:$off, I32:$addr, I32:$val)>; +// Select stores with a constant offset. +def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_F32 imm:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE_F64 imm:$off, I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>; + +// Select stores with just a constant offset. +def : Pat<(store I32:$val, imm:$off), + (STORE_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, imm:$off), + (STORE_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, imm:$off), + (STORE_F32 imm:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, imm:$off), + (STORE_F64 imm:$off, (CONST_I32 0), F64:$val)>; +def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_F32 tglobaladdr:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE_F64 tglobaladdr:$off, (CONST_I32 0), F64:$val)>; +def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_F32 texternalsym:$off, (CONST_I32 0), F32:$val)>; +def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE_F64 texternalsym:$off, (CONST_I32 0), F64:$val)>; let Defs = [ARGUMENTS] in { @@ -130,6 +410,7 @@ } // Defs = [ARGUMENTS] +// Select truncating stores with no constant offset. def : Pat<(truncstorei8 I32:$val, I32:$addr), (STORE8_I32 0, I32:$addr, I32:$val)>; def : Pat<(truncstorei16 I32:$val, I32:$addr), @@ -141,6 +422,70 @@ def : Pat<(truncstorei32 I64:$val, I32:$addr), (STORE32_I64 0, I32:$addr, I64:$val)>; +// Select truncating stores with a constant offset. +def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE8_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE16_I32 imm:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE8_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE16_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)), + (STORE32_I64 imm:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)), + (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)), + (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>; + +// Select truncating stores with just a constant offset. +def : Pat<(truncstorei8 I32:$val, imm:$off), + (STORE8_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, imm:$off), + (STORE16_I32 imm:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, imm:$off), + (STORE8_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, imm:$off), + (STORE16_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, imm:$off), + (STORE32_I64 imm:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), + (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE8_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE16_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE8_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE16_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)), + (STORE32_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + let Defs = [ARGUMENTS] in { // Memory size. Index: llvm/trunk/test/CodeGen/WebAssembly/global.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/global.ll +++ llvm/trunk/test/CodeGen/WebAssembly/global.ll @@ -10,8 +10,8 @@ @llvm.used = appending global [1 x i32*] [i32* @g], section "llvm.metadata" ; CHECK: foo: -; CHECK: i32.const $push0=, answer{{$}} -; CHECK-NEXT: i32.load $push1=, 0($pop0){{$}} +; CHECK: i32.const $push0=, 0{{$}} +; CHECK-NEXT: i32.load $push1=, answer($pop0){{$}} ; CHECK-NEXT: return $pop1{{$}} define i32 @foo() { %a = load i32, i32* @answer Index: llvm/trunk/test/CodeGen/WebAssembly/offset.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/offset.ll +++ llvm/trunk/test/CodeGen/WebAssembly/offset.ll @@ -0,0 +1,185 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test constant load and store address offsets. + +target datalayout = "e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; With an nuw add, we can fold an offset. + +; CHECK-LABEL: load_i32_with_folded_offset: +; CHECK: i32.load $push0=, 24($0){{$}} +define i32 @load_i32_with_folded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load i32, i32* %s + ret i32 %t +} + +; Without nuw, and even with nsw, we can't fold an offset. + +; CHECK-LABEL: load_i32_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.load $push2=, 0($pop1){{$}} +define i32 @load_i32_with_unfolded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i32* + %t = load i32, i32* %s + ret i32 %t +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_folded_offset: +; CHECK: i64.load $push0=, 24($0){{$}} +define i64 @load_i64_with_folded_offset(i64* %p) { + %q = ptrtoint i64* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %t = load i64, i64* %s + ret i64 %t +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.load $push2=, 0($pop1){{$}} +define i64 @load_i64_with_unfolded_offset(i64* %p) { + %q = ptrtoint i64* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i64* + %t = load i64, i64* %s + ret i64 %t +} + +; Same as above but with store. + +; CHECK-LABEL: store_i32_with_folded_offset: +; CHECK: i32.store $discard=, 24($0), $pop0{{$}} +define void @store_i32_with_folded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i32* + store i32 0, i32* %s + ret void +} + +; Same as above but with store. + +; CHECK-LABEL: store_i32_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i32_with_unfolded_offset(i32* %p) { + %q = ptrtoint i32* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i32* + store i32 0, i32* %s + ret void +} + +; Same as above but with store with i64. + +; CHECK-LABEL: store_i64_with_folded_offset: +; CHECK: i64.store $discard=, 24($0), $pop0{{$}} +define void @store_i64_with_folded_offset(i64* %p) { + %q = ptrtoint i64* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i64* + store i64 0, i64* %s + ret void +} + +; Same as above but with store with i64. + +; CHECK-LABEL: store_i64_with_unfolded_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i64_with_unfolded_offset(i64* %p) { + %q = ptrtoint i64* %p to i32 + %r = add nsw i32 %q, 24 + %s = inttoptr i32 %r to i64* + store i64 0, i64* %s + ret void +} + +; When loading from a fixed address, materialize a zero. + +; CHECK-LABEL: load_i32_from_numeric_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.load $push1=, 42($pop0){{$}} +define i32 @load_i32_from_numeric_address() { + %s = inttoptr i32 42 to i32* + %t = load i32, i32* %s + ret i32 %t +} + +; CHECK-LABEL: load_i32_from_global_address +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.load $push1=, gv($pop0){{$}} +@gv = global i32 0 +define i32 @load_i32_from_global_address() { + %t = load i32, i32* @gv + ret i32 %t +} + +; CHECK-LABEL: store_i32_to_numeric_address: +; CHECK: i32.const $0=, 0{{$}} +; CHECK: i32.store $discard=, 42($0), $0{{$}} +define void @store_i32_to_numeric_address() { + %s = inttoptr i32 42 to i32* + store i32 0, i32* %s + ret void +} + +; CHECK-LABEL: store_i32_to_global_address: +; CHECK: i32.const $0=, 0{{$}} +; CHECK: i32.store $discard=, gv($0), $0{{$}} +define void @store_i32_to_global_address() { + store i32 0, i32* @gv + ret void +} + +; Fold an offset into a sign-extending load. + +; CHECK-LABEL: load_i8_s_with_folded_offset: +; CHECK: i32.load8_s $push0=, 24($0){{$}} +define i32 @load_i8_s_with_folded_offset(i8* %p) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %t = load i8, i8* %s + %u = sext i8 %t to i32 + ret i32 %u +} + +; Fold an offset into a zero-extending load. + +; CHECK-LABEL: load_i8_u_with_folded_offset: +; CHECK: i32.load8_u $push0=, 24($0){{$}} +define i32 @load_i8_u_with_folded_offset(i8* %p) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + %t = load i8, i8* %s + %u = zext i8 %t to i32 + ret i32 %u +} + +; Fold an offset into a truncating store. + +; CHECK-LABEL: store_i8_with_folded_offset: +; CHECK: i32.store8 $discard=, 24($0), $pop0{{$}} +define void @store_i8_with_folded_offset(i8* %p) { + %q = ptrtoint i8* %p to i32 + %r = add nuw i32 %q, 24 + %s = inttoptr i32 %r to i8* + store i8 0, i8* %s + ret void +} Index: llvm/trunk/test/CodeGen/WebAssembly/store-results.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/store-results.ll +++ llvm/trunk/test/CodeGen/WebAssembly/store-results.ll @@ -26,7 +26,7 @@ @pos = global %class.Vec3 zeroinitializer, align 4 ; CHECK-LABEL: foo: -; CHECK: i32.store $discard=, 0($pop0), $0{{$}} +; CHECK: i32.store $discard=, pos($0), $0{{$}} define void @foo() { for.body.i: br label %for.body5.i @@ -44,7 +44,7 @@ } ; CHECK-LABEL: bar: -; CHECK: i32.store $discard=, 0($0), $pop0{{$}} +; CHECK: i32.store $discard=, pos($0), $0{{$}} define void @bar() { for.body.i: br label %for.body5.i