diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -43,8 +43,7 @@ public: WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr) { - } + : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr) {} StringRef getPassName() const override { return "WebAssembly Instruction Selection"; @@ -67,11 +66,19 @@ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override; + bool SelectLoadOperands32(SDValue Op, SDValue &Offset, SDValue &Addr); + bool SelectLoadOperands64(SDValue Op, SDValue &Offset, SDValue &Addr); + // Include the pieces autogenerated from the target description. #include "WebAssemblyGenDAGISel.inc" private: // add select functions here... + + bool SelectLoadOperands(MVT AddrType, unsigned ConstOpc, SDValue Op, + SDValue &Offset, SDValue &Addr); + bool SelectLoadAddOperands(MVT OffsetType, SDValue N, SDValue &Offset, + SDValue &Addr); }; } // end anonymous namespace @@ -281,6 +288,118 @@ return true; } +bool WebAssemblyDAGToDAGISel::SelectLoadAddOperands(MVT OffsetType, SDValue N, + SDValue &Offset, + SDValue &Addr) { + assert(N.getNumOperands() == 2 && "Attempting to fold in a non-binary op"); + + // Fold target global addresses in an add into the offset. + if (!TM.isPositionIndependent()) { + for (size_t i = 0; i < 2; ++i) { + SDValue Op = N.getOperand(i); + SDValue OtherOp = N.getOperand(i == 0 ? 1 : 0); + + if (Op.getOpcode() == WebAssemblyISD::Wrapper) + Op = Op.getOperand(0); + + if (Op.getOpcode() == ISD::TargetGlobalAddress) { + Offset = Op; + Addr = OtherOp; + return true; + } + } + } + + // WebAssembly constant offsets are performed as unsigned with + // infinite precision, so we need to check for NoUnsignedWrap so + // that we don't fold an offset for an add that needs wrapping. + if (N.getOpcode() == ISD::ADD && !N.getNode()->getFlags().hasNoUnsignedWrap()) + return false; + + // Folds constants in an add into the offset. + for (size_t i = 0; i < 2; ++i) { + SDValue Op = N.getOperand(i); + SDValue OtherOp = N.getOperand(i == 0 ? 1 : 0); + + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) { + Offset = + CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), OffsetType); + Addr = OtherOp; + return true; + } + } + return false; +} + +bool WebAssemblyDAGToDAGISel::SelectLoadOperands(MVT AddrTyp, unsigned ConstOpc, + SDValue N, SDValue &Offset, + SDValue &Addr) { + SDLoc DL(N); + + // Fold target global addresses into the offset. + if (!TM.isPositionIndependent()) { + SDValue Op(N); + if (Op.getOpcode() == WebAssemblyISD::Wrapper) + Op = Op.getOperand(0); + + if (Op.getOpcode() == ISD::TargetGlobalAddress) { + Offset = Op; + Addr = SDValue( + CurDAG->getMachineNode(ConstOpc, DL, AddrTyp, + CurDAG->getTargetConstant(0, DL, AddrTyp)), + 0); + return true; + } + } + + // Fold anything inside an add into the offset. + if (N.getOpcode() == ISD::ADD && + SelectLoadAddOperands(AddrTyp, N, Offset, Addr)) + return true; + + // Likewise, treat an 'or' node as an 'add' if the or'ed bits are + // known to be zero and fold them into the offset too. + if (N.getOpcode() == ISD::OR) { + bool OrIsAdd; + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + OrIsAdd = + CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + } else { + KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0); + KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0); + OrIsAdd = (~Known0.Zero & ~Known1.Zero) == 0; + } + + if (OrIsAdd && SelectLoadAddOperands(AddrTyp, N, Offset, Addr)) + return true; + } + + // Fold constant addresses into the offset. + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), DL, AddrTyp); + Addr = SDValue( + CurDAG->getMachineNode(ConstOpc, DL, AddrTyp, + CurDAG->getTargetConstant(0, DL, AddrTyp)), + 0); + return true; + } + + // Else it's a plain old load with no offset. + Offset = CurDAG->getTargetConstant(0, DL, AddrTyp); + Addr = N; + return true; +} + +bool WebAssemblyDAGToDAGISel::SelectLoadOperands32(SDValue Op, SDValue &Offset, + SDValue &Addr) { + return SelectLoadOperands(MVT::i32, WebAssembly::CONST_I32, Op, Offset, Addr); +} + +bool WebAssemblyDAGToDAGISel::SelectLoadOperands64(SDValue Op, SDValue &Offset, + SDValue &Addr) { + return SelectLoadOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr); +} + /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready /// for instruction scheduling. FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -226,25 +226,9 @@ defm ATOMIC_LOAD_I32 : AtomicLoad<I32, "i32.atomic.load", 0x10>; defm ATOMIC_LOAD_I64 : AtomicLoad<I64, "i64.atomic.load", 0x11>; -// Select loads with no constant offset. -defm : LoadPatNoOffset<i32, atomic_load_32, "ATOMIC_LOAD_I32">; -defm : LoadPatNoOffset<i64, atomic_load_64, "ATOMIC_LOAD_I64">; - -// Select loads with a constant offset. - -// Pattern with address + immediate offset -defm : LoadPatImmOff<i32, atomic_load_32, regPlusImm, "ATOMIC_LOAD_I32">; -defm : LoadPatImmOff<i64, atomic_load_64, regPlusImm, "ATOMIC_LOAD_I64">; -defm : LoadPatImmOff<i32, atomic_load_32, or_is_add, "ATOMIC_LOAD_I32">; -defm : LoadPatImmOff<i64, atomic_load_64, or_is_add, "ATOMIC_LOAD_I64">; - -// Select loads with just a constant offset. -defm : LoadPatOffsetOnly<i32, atomic_load_32, "ATOMIC_LOAD_I32">; -defm : LoadPatOffsetOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">; - -defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, "ATOMIC_LOAD_I32">; -defm : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">; - +// Select loads +defm : LoadPat<i32, atomic_load_32, "ATOMIC_LOAD_I32">; +defm : LoadPat<i64, atomic_load_64, "ATOMIC_LOAD_I64">; // Extending loads. Note that there are only zero-extending atomic loads, no // sign-extending loads. @@ -283,54 +267,18 @@ def sext_aload_16_64 : PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>; -// Select zero-extending loads with no constant offset. -defm : LoadPatNoOffset<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">; -defm : LoadPatNoOffset<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">; -defm : LoadPatNoOffset<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">; +// Select zero-extending loads +defm : LoadPat<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">; +defm : LoadPat<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">; +defm : LoadPat<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">; -// Select sign-extending loads with no constant offset -defm : LoadPatNoOffset<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">; -defm : LoadPatNoOffset<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">; -defm : LoadPatNoOffset<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">; -defm : LoadPatNoOffset<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">; +// Select sign-extending loads +defm : LoadPat<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">; +defm : LoadPat<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">; +defm : LoadPat<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">; +defm : LoadPat<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">; // 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s -// Zero-extending loads with constant offset -defm : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, "ATOMIC_LOAD8_U_I64">; -defm : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, "ATOMIC_LOAD16_U_I64">; -defm : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, "ATOMIC_LOAD32_U_I64">; -defm : LoadPatImmOff<i64, zext_aload_8_64, or_is_add, "ATOMIC_LOAD8_U_I64">; -defm : LoadPatImmOff<i64, zext_aload_16_64, or_is_add, "ATOMIC_LOAD16_U_I64">; -defm : LoadPatImmOff<i64, zext_aload_32_64, or_is_add, "ATOMIC_LOAD32_U_I64">; - -// Sign-extending loads with constant offset -defm : LoadPatImmOff<i32, atomic_load_8, regPlusImm, "ATOMIC_LOAD8_U_I32">; -defm : LoadPatImmOff<i32, atomic_load_16, regPlusImm, "ATOMIC_LOAD16_U_I32">; -defm : LoadPatImmOff<i32, atomic_load_8, or_is_add, "ATOMIC_LOAD8_U_I32">; -defm : LoadPatImmOff<i32, atomic_load_16, or_is_add, "ATOMIC_LOAD16_U_I32">; -defm : LoadPatImmOff<i64, sext_aload_8_64, regPlusImm, "ATOMIC_LOAD8_U_I64">; -defm : LoadPatImmOff<i64, sext_aload_16_64, regPlusImm, "ATOMIC_LOAD16_U_I64">; -defm : LoadPatImmOff<i64, sext_aload_8_64, or_is_add, "ATOMIC_LOAD8_U_I64">; -defm : LoadPatImmOff<i64, sext_aload_16_64, or_is_add, "ATOMIC_LOAD16_U_I64">; -// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64 - -// Extending loads with just a constant offset -defm : LoadPatOffsetOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">; -defm : LoadPatOffsetOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">; -defm : LoadPatOffsetOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">; -defm : LoadPatOffsetOnly<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">; -defm : LoadPatOffsetOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">; -defm : LoadPatOffsetOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">; -defm : LoadPatOffsetOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">; - -defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">; -defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">; -defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">; -defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">; -defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">; -defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">; -defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">; - //===----------------------------------------------------------------------===// // Atomic stores diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -66,70 +66,6 @@ defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a, []>; defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b, []>; -// Select loads with no constant offset. -multiclass LoadPatNoOffset<ValueType ty, SDPatternOperator kind, string inst> { - def : Pat<(ty (kind I32:$addr)), (!cast<NI>(inst # "_A32") 0, 0, I32:$addr)>, - Requires<[HasAddr32]>; - def : Pat<(ty (kind (i64 I64:$addr))), (!cast<NI>(inst # "_A64") 0, 0, I64:$addr)>, - Requires<[HasAddr64]>; -} - -defm : LoadPatNoOffset<i32, load, "LOAD_I32">; -defm : LoadPatNoOffset<i64, load, "LOAD_I64">; -defm : LoadPatNoOffset<f32, load, "LOAD_F32">; -defm : LoadPatNoOffset<f64, load, "LOAD_F64">; - -// Select loads with a constant offset. - -// Pattern with address + immediate offset -multiclass LoadPatImmOff<ValueType ty, SDPatternOperator kind, PatFrag operand, - string inst> { - def : Pat<(ty (kind (operand I32:$addr, imm:$off))), - (!cast<NI>(inst # "_A32") 0, imm:$off, I32:$addr)>, - Requires<[HasAddr32]>; - def : Pat<(ty (kind (operand I64:$addr, imm:$off))), - (!cast<NI>(inst # "_A64") 0, imm:$off, I64:$addr)>, - Requires<[HasAddr64]>; -} - -defm : LoadPatImmOff<i32, load, regPlusImm, "LOAD_I32">; -defm : LoadPatImmOff<i64, load, regPlusImm, "LOAD_I64">; -defm : LoadPatImmOff<f32, load, regPlusImm, "LOAD_F32">; -defm : LoadPatImmOff<f64, load, regPlusImm, "LOAD_F64">; -defm : LoadPatImmOff<i32, load, or_is_add, "LOAD_I32">; -defm : LoadPatImmOff<i64, load, or_is_add, "LOAD_I64">; -defm : LoadPatImmOff<f32, load, or_is_add, "LOAD_F32">; -defm : LoadPatImmOff<f64, load, or_is_add, "LOAD_F64">; - -// Select loads with just a constant offset. -multiclass LoadPatOffsetOnly<ValueType ty, SDPatternOperator kind, string inst> { - def : Pat<(ty (kind imm:$off)), - (!cast<NI>(inst # "_A32") 0, imm:$off, (CONST_I32 0))>, - Requires<[HasAddr32]>; - def : Pat<(ty (kind imm:$off)), - (!cast<NI>(inst # "_A64") 0, imm:$off, (CONST_I64 0))>, - Requires<[HasAddr64]>; -} - -defm : LoadPatOffsetOnly<i32, load, "LOAD_I32">; -defm : LoadPatOffsetOnly<i64, load, "LOAD_I64">; -defm : LoadPatOffsetOnly<f32, load, "LOAD_F32">; -defm : LoadPatOffsetOnly<f64, load, "LOAD_F64">; - -multiclass LoadPatGlobalAddrOffOnly<ValueType ty, SDPatternOperator kind, string inst> { - def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off))), - (!cast<NI>(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0))>, - Requires<[IsNotPIC, HasAddr32]>; - def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off))), - (!cast<NI>(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0))>, - Requires<[IsNotPIC, HasAddr64]>; -} - -defm : LoadPatGlobalAddrOffOnly<i32, load, "LOAD_I32">; -defm : LoadPatGlobalAddrOffOnly<i64, load, "LOAD_I64">; -defm : LoadPatGlobalAddrOffOnly<f32, load, "LOAD_F32">; -defm : LoadPatGlobalAddrOffOnly<f64, load, "LOAD_F64">; - // Extending load. defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c, []>; defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d, []>; @@ -142,98 +78,57 @@ defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34, []>; defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35, []>; -// Select extending loads with no constant offset. -defm : LoadPatNoOffset<i32, sextloadi8, "LOAD8_S_I32">; -defm : LoadPatNoOffset<i32, zextloadi8, "LOAD8_U_I32">; -defm : LoadPatNoOffset<i32, sextloadi16, "LOAD16_S_I32">; -defm : LoadPatNoOffset<i32, zextloadi16, "LOAD16_U_I32">; -defm : LoadPatNoOffset<i64, sextloadi8, "LOAD8_S_I64">; -defm : LoadPatNoOffset<i64, zextloadi8, "LOAD8_U_I64">; -defm : LoadPatNoOffset<i64, sextloadi16, "LOAD16_S_I64">; -defm : LoadPatNoOffset<i64, zextloadi16, "LOAD16_U_I64">; -defm : LoadPatNoOffset<i64, sextloadi32, "LOAD32_S_I64">; -defm : LoadPatNoOffset<i64, zextloadi32, "LOAD32_U_I64">; - -// Select extending loads with a constant offset. -defm : LoadPatImmOff<i32, sextloadi8, regPlusImm, "LOAD8_S_I32">; -defm : LoadPatImmOff<i32, zextloadi8, regPlusImm, "LOAD8_U_I32">; -defm : LoadPatImmOff<i32, sextloadi16, regPlusImm, "LOAD16_S_I32">; -defm : LoadPatImmOff<i32, zextloadi16, regPlusImm, "LOAD16_U_I32">; -defm : LoadPatImmOff<i64, sextloadi8, regPlusImm, "LOAD8_S_I64">; -defm : LoadPatImmOff<i64, zextloadi8, regPlusImm, "LOAD8_U_I64">; -defm : LoadPatImmOff<i64, sextloadi16, regPlusImm, "LOAD16_S_I64">; -defm : LoadPatImmOff<i64, zextloadi16, regPlusImm, "LOAD16_U_I64">; -defm : LoadPatImmOff<i64, sextloadi32, regPlusImm, "LOAD32_S_I64">; -defm : LoadPatImmOff<i64, zextloadi32, regPlusImm, "LOAD32_U_I64">; - -defm : LoadPatImmOff<i32, sextloadi8, or_is_add, "LOAD8_S_I32">; -defm : LoadPatImmOff<i32, zextloadi8, or_is_add, "LOAD8_U_I32">; -defm : LoadPatImmOff<i32, sextloadi16, or_is_add, "LOAD16_S_I32">; -defm : LoadPatImmOff<i32, zextloadi16, or_is_add, "LOAD16_U_I32">; -defm : LoadPatImmOff<i64, sextloadi8, or_is_add, "LOAD8_S_I64">; -defm : LoadPatImmOff<i64, zextloadi8, or_is_add, "LOAD8_U_I64">; -defm : LoadPatImmOff<i64, sextloadi16, or_is_add, "LOAD16_S_I64">; -defm : LoadPatImmOff<i64, zextloadi16, or_is_add, "LOAD16_U_I64">; -defm : LoadPatImmOff<i64, sextloadi32, or_is_add, "LOAD32_S_I64">; -defm : LoadPatImmOff<i64, zextloadi32, or_is_add, "LOAD32_U_I64">; - -// Select extending loads with just a constant offset. -defm : LoadPatOffsetOnly<i32, sextloadi8, "LOAD8_S_I32">; -defm : LoadPatOffsetOnly<i32, zextloadi8, "LOAD8_U_I32">; -defm : LoadPatOffsetOnly<i32, sextloadi16, "LOAD16_S_I32">; -defm : LoadPatOffsetOnly<i32, zextloadi16, "LOAD16_U_I32">; - -defm : LoadPatOffsetOnly<i64, sextloadi8, "LOAD8_S_I64">; -defm : LoadPatOffsetOnly<i64, zextloadi8, "LOAD8_U_I64">; -defm : LoadPatOffsetOnly<i64, sextloadi16, "LOAD16_S_I64">; -defm : LoadPatOffsetOnly<i64, zextloadi16, "LOAD16_U_I64">; -defm : LoadPatOffsetOnly<i64, sextloadi32, "LOAD32_S_I64">; -defm : LoadPatOffsetOnly<i64, zextloadi32, "LOAD32_U_I64">; - -defm : LoadPatGlobalAddrOffOnly<i32, sextloadi8, "LOAD8_S_I32">; -defm : LoadPatGlobalAddrOffOnly<i32, zextloadi8, "LOAD8_U_I32">; -defm : LoadPatGlobalAddrOffOnly<i32, sextloadi16, "LOAD16_S_I32">; -defm : LoadPatGlobalAddrOffOnly<i32, zextloadi16, "LOAD16_U_I32">; -defm : LoadPatGlobalAddrOffOnly<i64, sextloadi8, "LOAD8_S_I64">; -defm : LoadPatGlobalAddrOffOnly<i64, zextloadi8, "LOAD8_U_I64">; -defm : LoadPatGlobalAddrOffOnly<i64, sextloadi16, "LOAD16_S_I64">; -defm : LoadPatGlobalAddrOffOnly<i64, zextloadi16, "LOAD16_U_I64">; -defm : LoadPatGlobalAddrOffOnly<i64, sextloadi32, "LOAD32_S_I64">; -defm : LoadPatGlobalAddrOffOnly<i64, zextloadi32, "LOAD32_U_I64">; - -// Resolve "don't care" extending loads to zero-extending loads. This is -// somewhat arbitrary, but zero-extending is conceptually simpler. - -// Select "don't care" extending loads with no constant offset. -defm : LoadPatNoOffset<i32, extloadi8, "LOAD8_U_I32">; -defm : LoadPatNoOffset<i32, extloadi16, "LOAD16_U_I32">; -defm : LoadPatNoOffset<i64, extloadi8, "LOAD8_U_I64">; -defm : LoadPatNoOffset<i64, extloadi16, "LOAD16_U_I64">; -defm : LoadPatNoOffset<i64, extloadi32, "LOAD32_U_I64">; - -// Select "don't care" extending loads with a constant offset. -defm : LoadPatImmOff<i32, extloadi8, regPlusImm, "LOAD8_U_I32">; -defm : LoadPatImmOff<i32, extloadi16, regPlusImm, "LOAD16_U_I32">; -defm : LoadPatImmOff<i64, extloadi8, regPlusImm, "LOAD8_U_I64">; -defm : LoadPatImmOff<i64, extloadi16, regPlusImm, "LOAD16_U_I64">; -defm : LoadPatImmOff<i64, extloadi32, regPlusImm, "LOAD32_U_I64">; -defm : LoadPatImmOff<i32, extloadi8, or_is_add, "LOAD8_U_I32">; -defm : LoadPatImmOff<i32, extloadi16, or_is_add, "LOAD16_U_I32">; -defm : LoadPatImmOff<i64, extloadi8, or_is_add, "LOAD8_U_I64">; -defm : LoadPatImmOff<i64, extloadi16, or_is_add, "LOAD16_U_I64">; -defm : LoadPatImmOff<i64, extloadi32, or_is_add, "LOAD32_U_I64">; - -// Select "don't care" extending loads with just a constant offset. -defm : LoadPatOffsetOnly<i32, extloadi8, "LOAD8_U_I32">; -defm : LoadPatOffsetOnly<i32, extloadi16, "LOAD16_U_I32">; -defm : LoadPatOffsetOnly<i64, extloadi8, "LOAD8_U_I64">; -defm : LoadPatOffsetOnly<i64, extloadi16, "LOAD16_U_I64">; -defm : LoadPatOffsetOnly<i64, extloadi32, "LOAD32_U_I64">; -defm : LoadPatGlobalAddrOffOnly<i32, extloadi8, "LOAD8_U_I32">; -defm : LoadPatGlobalAddrOffOnly<i32, extloadi16, "LOAD16_U_I32">; -defm : LoadPatGlobalAddrOffOnly<i64, extloadi8, "LOAD8_U_I64">; -defm : LoadPatGlobalAddrOffOnly<i64, extloadi16, "LOAD16_U_I64">; -defm : LoadPatGlobalAddrOffOnly<i64, extloadi32, "LOAD32_U_I64">; +// Pattern matching + +// Patterns that match the static (offset) and dynamic (stack operand) +// operands for loads, based on a combination of target global +// addresses and constants. +// For example, +// (add tga x) -> load tga(x) +// tga -> load tga(0) +// (add const x) -> load const(x) +// const -> load const(0) +// x -> load 0(x) +def LoadOps32 : ComplexPattern<i32, 2, "SelectLoadOperands32">; +def LoadOps64 : ComplexPattern<i64, 2, "SelectLoadOperands64">; + +multiclass LoadPat<ValueType ty, SDPatternOperator kind, string Name> { + def : Pat<(ty (kind (LoadOps32 I32:$offset, I32:$dynamic))), + (!cast<NI>(Name # "_A32") 0, + I32:$offset, + I32:$dynamic)>, + Requires<[HasAddr32]>; + + def : Pat<(ty (kind (LoadOps64 I64:$offset, I64:$dynamic))), + (!cast<NI>(Name # "_A64") 0, + I64:$offset, + I64:$dynamic)>, + Requires<[HasAddr64]>; + +} + +defm : LoadPat<i32, load, "LOAD_I32">; +defm : LoadPat<i64, load, "LOAD_I64">; +defm : LoadPat<f32, load, "LOAD_F32">; +defm : LoadPat<f64, load, "LOAD_F64">; + +defm : LoadPat<i32, sextloadi8, "LOAD8_S_I32">; +defm : LoadPat<i32, sextloadi16, "LOAD16_S_I32">; +defm : LoadPat<i64, sextloadi8, "LOAD8_S_I64">; +defm : LoadPat<i64, sextloadi16, "LOAD16_S_I64">; +defm : LoadPat<i64, sextloadi32, "LOAD32_S_I64">; + +defm : LoadPat<i32, zextloadi8, "LOAD8_U_I32">; +defm : LoadPat<i32, zextloadi16, "LOAD16_U_I32">; +defm : LoadPat<i64, zextloadi8, "LOAD8_U_I64">; +defm : LoadPat<i64, zextloadi16, "LOAD16_U_I64">; +defm : LoadPat<i64, zextloadi32, "LOAD32_U_I64">; + +defm : LoadPat<i32, extloadi8, "LOAD8_U_I32">; +defm : LoadPat<i32, extloadi16, "LOAD16_U_I32">; +defm : LoadPat<i64, extloadi8, "LOAD8_U_I64">; +defm : LoadPat<i64, extloadi16, "LOAD16_U_I64">; +defm : LoadPat<i64, extloadi32, "LOAD32_U_I64">; // Defines atomic and non-atomic stores, regular and truncating multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -167,11 +167,7 @@ // Def load patterns from WebAssemblyInstrMemory.td for vector types foreach vec = AllVecs in { -defm : LoadPatNoOffset<vec.vt, load, "LOAD_V128">; -defm : LoadPatImmOff<vec.vt, load, regPlusImm, "LOAD_V128">; -defm : LoadPatImmOff<vec.vt, load, or_is_add, "LOAD_V128">; -defm : LoadPatOffsetOnly<vec.vt, load, "LOAD_V128">; -defm : LoadPatGlobalAddrOffOnly<vec.vt, load, "LOAD_V128">; +defm : LoadPat<vec.vt, load, "LOAD_V128">; } // v128.loadX_splat @@ -206,11 +202,7 @@ foreach vec = AllVecs in { defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; -defm : LoadPatNoOffset<vec.vt, load_splat, inst>; -defm : LoadPatImmOff<vec.vt, load_splat, regPlusImm, inst>; -defm : LoadPatImmOff<vec.vt, load_splat, or_is_add, inst>; -defm : LoadPatOffsetOnly<vec.vt, load_splat, inst>; -defm : LoadPatGlobalAddrOffOnly<vec.vt, load_splat, inst>; +defm : LoadPat<vec.vt, load_splat, inst>; } // Load and extend @@ -255,11 +247,7 @@ ["extloadvi", "_U"]] in { defvar loadpat = !cast<PatFrag>(exts[0]#vec.split.lane_bits); defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec; -defm : LoadPatNoOffset<vec.vt, loadpat, inst>; -defm : LoadPatImmOff<vec.vt, loadpat, regPlusImm, inst>; -defm : LoadPatImmOff<vec.vt, loadpat, or_is_add, inst>; -defm : LoadPatOffsetOnly<vec.vt, loadpat, inst>; -defm : LoadPatGlobalAddrOffOnly<vec.vt, loadpat, inst>; +defm : LoadPat<vec.vt, loadpat, inst>; } // Load lane into zero vector @@ -289,11 +277,7 @@ foreach vec = [I32x4, I64x2] in { defvar inst = "LOAD_ZERO_"#vec; defvar pat = PatFrag<(ops node:$addr), (scalar_to_vector (vec.lane_vt (load $addr)))>; - defm : LoadPatNoOffset<vec.vt, pat, inst>; - defm : LoadPatImmOff<vec.vt, pat, regPlusImm, inst>; - defm : LoadPatImmOff<vec.vt, pat, or_is_add, inst>; - defm : LoadPatOffsetOnly<vec.vt, pat, inst>; - defm : LoadPatGlobalAddrOffOnly<vec.vt, pat, inst>; + defm : LoadPat<vec.vt, pat, inst>; } // TODO: f32x4 and f64x2 as well @@ -301,11 +285,7 @@ defvar inst = "LOAD_ZERO_"#vec; defvar pat = PatFrag<(ops node:$ptr), (vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>; - defm : LoadPatNoOffset<vec.vt, pat, inst>; - defm : LoadPatImmOff<vec.vt, pat, regPlusImm, inst>; - defm : LoadPatImmOff<vec.vt, pat, or_is_add, inst>; - defm : LoadPatOffsetOnly<vec.vt, pat, inst>; - defm : LoadPatGlobalAddrOffOnly<vec.vt, pat, inst>; + defm : LoadPat<vec.vt, pat, inst>; } // Load lane diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -75,14 +75,19 @@ if (AddrOperandNum == FIOperandNum) { unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx( MI.getOpcode(), WebAssembly::OpName::off); - assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0); - int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset; - - if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) { - MI.getOperand(OffsetOperandNum).setImm(Offset); - MI.getOperand(FIOperandNum) - .ChangeToRegister(FrameRegister, /*isDef=*/false); - return false; + auto &OffsetOp = MI.getOperand(OffsetOperandNum); + // Don't fold offset in if offset is a global address to be resolved later + if (OffsetOp.isImm()) { + assert(FrameOffset >= 0 && OffsetOp.getImm() >= 0); + int64_t Offset = OffsetOp.getImm() + FrameOffset; + + if (static_cast<uint64_t>(Offset) <= + std::numeric_limits<uint32_t>::max()) { + OffsetOp.setImm(Offset); + MI.getOperand(FIOperandNum) + .ChangeToRegister(FrameRegister, /*isDef=*/false); + return false; + } } } diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll --- a/llvm/test/CodeGen/WebAssembly/offset.ll +++ b/llvm/test/CodeGen/WebAssembly/offset.ll @@ -666,3 +666,29 @@ define {i64,i32,i16,i8} @aggregate_return_without_merge() { ret {i64,i32,i16,i8} zeroinitializer } + +;===---------------------------------------------------------------------------- +; Global address loads +;===---------------------------------------------------------------------------- + +@global_i32 = external global i32 +@global_i8 = external global i8 + +; CHECK-LABEL: load_i32_global_address_with_folded_offset: +; CHECK: i32.const $push0=, 2 +; CHECK: i32.shl $push1=, $0, $pop0 +; CHECK: i32.load $push2=, global_i32($pop1) +define i32 @load_i32_global_address_with_folded_offset(i32 %n) { + %s = getelementptr inbounds i32, i32* @global_i32, i32 %n + %t = load i32, i32* %s + ret i32 %t +} + +; CHECK-LABEL: load_i8_i32s_global_address_with_folded_offset: +; CHECK: i32.load8_s $push0=, global_i8($0) +define i32 @load_i8_i32s_global_address_with_folded_offset(i32 %n) { + %s = getelementptr inbounds i8, i8* @global_i8, i32 %n + %t = load i8, i8* %s + %u = sext i8 %t to i32 + ret i32 %u +} diff --git a/llvm/test/CodeGen/WebAssembly/userstack.ll b/llvm/test/CodeGen/WebAssembly/userstack.ll --- a/llvm/test/CodeGen/WebAssembly/userstack.ll +++ b/llvm/test/CodeGen/WebAssembly/userstack.ll @@ -334,7 +334,7 @@ ; only possible when that operand is an immediate. In this example it is a ; global address, so we should not fold it. ; CHECK-LABEL: frame_offset_with_global_address -; CHECK: i[[PTR]].const ${{.*}}=, str +; CHECK: i32.load8_u ${{.*}}=, str @str = local_unnamed_addr global [3 x i8] c"abc", align 16 define i8 @frame_offset_with_global_address() { %1 = alloca i8, align 4