diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -66,8 +66,8 @@ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; - bool SelectLoadOperands32(SDValue Op, SDValue &Offset, SDValue &Addr); - bool SelectLoadOperands64(SDValue Op, SDValue &Offset, SDValue &Addr); + bool SelectAddrOperands32(SDValue Op, SDValue &Offset, SDValue &Addr); + bool SelectAddrOperands64(SDValue Op, SDValue &Offset, SDValue &Addr); // Include the pieces autogenerated from the target description. #include "WebAssemblyGenDAGISel.inc" @@ -75,9 +75,9 @@ private: // add select functions here... - bool SelectLoadOperands(MVT AddrType, unsigned ConstOpc, SDValue Op, + bool SelectAddrOperands(MVT AddrType, unsigned ConstOpc, SDValue Op, SDValue &Offset, SDValue &Addr); - bool SelectLoadAddOperands(MVT OffsetType, SDValue N, SDValue &Offset, + bool SelectAddrAddOperands(MVT OffsetType, SDValue N, SDValue &Offset, SDValue &Addr); }; } // end anonymous namespace @@ -288,7 +288,7 @@ return true; } -bool WebAssemblyDAGToDAGISel::SelectLoadAddOperands(MVT OffsetType, SDValue N, +bool WebAssemblyDAGToDAGISel::SelectAddrAddOperands(MVT OffsetType, SDValue N, SDValue &Offset, SDValue &Addr) { assert(N.getNumOperands() == 2 && "Attempting to fold in a non-binary op"); @@ -314,7 +314,7 @@ return false; } -bool WebAssemblyDAGToDAGISel::SelectLoadOperands(MVT AddrType, +bool WebAssemblyDAGToDAGISel::SelectAddrOperands(MVT AddrType, unsigned ConstOpc, SDValue N, SDValue &Offset, SDValue &Addr) { @@ -338,7 +338,7 @@ // Fold anything inside an add into the offset. if (N.getOpcode() == ISD::ADD && - SelectLoadAddOperands(AddrType, N, Offset, Addr)) + SelectAddrAddOperands(AddrType, N, Offset, Addr)) return true; // Likewise, treat an 'or' node as an 'add' if the or'ed bits are known to be @@ -354,7 +354,7 @@ OrIsAdd = (~Known0.Zero & ~Known1.Zero) == 0; } - if (OrIsAdd && SelectLoadAddOperands(AddrType, N, Offset, Addr)) + if (OrIsAdd && SelectAddrAddOperands(AddrType, N, Offset, Addr)) return true; } @@ -368,20 +368,20 @@ return true; } - // Else it's a plain old load with no offset. + // Else it's a plain old load/store with no offset. Offset = CurDAG->getTargetConstant(0, DL, AddrType); Addr = N; return true; } -bool WebAssemblyDAGToDAGISel::SelectLoadOperands32(SDValue Op, SDValue &Offset, +bool WebAssemblyDAGToDAGISel::SelectAddrOperands32(SDValue Op, SDValue &Offset, SDValue &Addr) { - return SelectLoadOperands(MVT::i32, WebAssembly::CONST_I32, Op, Offset, Addr); + return SelectAddrOperands(MVT::i32, WebAssembly::CONST_I32, Op, Offset, Addr); } -bool WebAssemblyDAGToDAGISel::SelectLoadOperands64(SDValue Op, SDValue &Offset, +bool WebAssemblyDAGToDAGISel::SelectAddrOperands64(SDValue Op, SDValue &Offset, SDValue &Addr) { - return SelectLoadOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr); + return SelectAddrOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr); } /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -77,129 +77,29 @@ } // mayLoad = 1 } // hasSideEffects = 1 -// Select notifys with no constant offset. -def NotifyPatNoOffset_A32 : - Pat<(i32 (int_wasm_memory_atomic_notify I32:$addr, I32:$count)), - (MEMORY_ATOMIC_NOTIFY_A32 0, 0, I32:$addr, I32:$count)>, +def NotifyPat_A32 : + Pat<(i32 (int_wasm_memory_atomic_notify (AddrOps32 offset32_op:$offset, I32:$addr), I32:$count)), + (MEMORY_ATOMIC_NOTIFY_A32 0, $offset, $addr, $count)>, Requires<[HasAddr32, HasAtomics]>; -def NotifyPatNoOffset_A64 : - Pat<(i32 (int_wasm_memory_atomic_notify I64:$addr, I32:$count)), - (MEMORY_ATOMIC_NOTIFY_A64 0, 0, I64:$addr, I32:$count)>, +def NotifyPat_A64 : + Pat<(i32 (int_wasm_memory_atomic_notify (AddrOps64 offset64_op:$offset, I64:$addr), I32:$count)), + (MEMORY_ATOMIC_NOTIFY_A64 0, $offset, $addr, $count)>, Requires<[HasAddr64, HasAtomics]>; -// Select notifys with a constant offset. -// Pattern with address + immediate offset -multiclass NotifyPatImmOff { - def : Pat<(i32 (int_wasm_memory_atomic_notify (operand I32:$addr, imm:$off), - I32:$count)), - (!cast(inst#_A32) 0, imm:$off, I32:$addr, I32:$count)>, - Requires<[HasAddr32, HasAtomics]>; - def : Pat<(i32 (int_wasm_memory_atomic_notify (operand I64:$addr, imm:$off), - I32:$count)), - (!cast(inst#_A64) 0, imm:$off, I64:$addr, I32:$count)>, - Requires<[HasAddr64, HasAtomics]>; +multiclass WaitPat { + def WaitPat_A32 : + Pat<(i32 (kind (AddrOps32 offset32_op:$offset, I32:$addr), ty:$exp, I64:$timeout)), + (!cast(inst#_A32) 0, $offset, $addr, $exp, $timeout)>, + Requires<[HasAddr32, HasAtomics]>; + def WaitPat_A64 : + Pat<(i32 (kind (AddrOps64 offset64_op:$offset, I64:$addr), ty:$exp, I64:$timeout)), + (!cast(inst#_A64) 0, $offset, $addr, $exp, $timeout)>, + Requires<[HasAddr64, HasAtomics]>; } -defm : NotifyPatImmOff; -defm : NotifyPatImmOff; - -// Select notifys with just a constant offset. -def NotifyPatOffsetOnly_A32 : - Pat<(i32 (int_wasm_memory_atomic_notify imm:$off, I32:$count)), - (MEMORY_ATOMIC_NOTIFY_A32 0, imm:$off, (CONST_I32 0), I32:$count)>, - Requires<[HasAddr32, HasAtomics]>; -def NotifyPatOffsetOnly_A64 : - Pat<(i32 (int_wasm_memory_atomic_notify imm:$off, I32:$count)), - (MEMORY_ATOMIC_NOTIFY_A64 0, imm:$off, (CONST_I64 0), I32:$count)>, - Requires<[HasAddr64, HasAtomics]>; -def NotifyPatGlobalAddrOffOnly_A32 : - Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblyWrapper tglobaladdr:$off), - I32:$count)), - (MEMORY_ATOMIC_NOTIFY_A32 0, tglobaladdr:$off, (CONST_I32 0), I32:$count) - >, - Requires<[HasAddr32, HasAtomics, IsNotPIC]>; -def NotifyPatGlobalAddrOffOnly_A64 : - Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblyWrapper tglobaladdr:$off), - I32:$count)), - (MEMORY_ATOMIC_NOTIFY_A64 0, tglobaladdr:$off, (CONST_I64 0), I32:$count) - >, - Requires<[HasAddr64, HasAtomics, IsNotPIC]>; - -// Select waits with no constant offset. -multiclass WaitPatNoOffset { - def : Pat<(i32 (kind I32:$addr, ty:$exp, I64:$timeout)), - (!cast(inst#_A32) 0, 0, I32:$addr, ty:$exp, I64:$timeout)>, - Requires<[HasAddr32, HasAtomics]>; - def : Pat<(i32 (kind I64:$addr, ty:$exp, I64:$timeout)), - (!cast(inst#_A64) 0, 0, I64:$addr, ty:$exp, I64:$timeout)>, - Requires<[HasAddr64, HasAtomics]>; -} -defm : WaitPatNoOffset; -defm : WaitPatNoOffset; -defm : WaitPatNoOffset; -defm : WaitPatNoOffset; - -// Select waits with a constant offset. - -// Pattern with address + immediate offset -multiclass WaitPatImmOff { - def : Pat<(i32 (kind (operand I32:$addr, imm:$off), ty:$exp, I64:$timeout)), - (!cast(inst#_A32) 0, imm:$off, I32:$addr, ty:$exp, - I64:$timeout)>, - Requires<[HasAddr32, HasAtomics]>; - def : Pat<(i32 (kind (operand I64:$addr, imm:$off), ty:$exp, I64:$timeout)), - (!cast(inst#_A64) 0, imm:$off, I64:$addr, ty:$exp, - I64:$timeout)>, - Requires<[HasAddr64, HasAtomics]>; -} -defm : WaitPatImmOff; -defm : WaitPatImmOff; -defm : WaitPatImmOff; -defm : WaitPatImmOff; - -// Select waits with just a constant offset. -multiclass WaitPatOffsetOnly { - def : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)), - (!cast(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$exp, - I64:$timeout)>, - Requires<[HasAddr32, HasAtomics]>; - def : Pat<(i32 (kind imm:$off, ty:$exp, I64:$timeout)), - (!cast(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$exp, - I64:$timeout)>, - Requires<[HasAddr64, HasAtomics]>; -} -defm : WaitPatOffsetOnly; -defm : WaitPatOffsetOnly; - -multiclass WaitPatGlobalAddrOffOnly { - def : Pat<(i32 (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp, - I64:$timeout)), - (!cast(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, - I64:$timeout)>, - Requires<[HasAddr32, HasAtomics, IsNotPIC]>; - def : Pat<(i32 (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp, - I64:$timeout)), - (!cast(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp, - I64:$timeout)>, - Requires<[HasAddr64, HasAtomics, IsNotPIC]>; -} -defm : WaitPatGlobalAddrOffOnly; -defm : WaitPatGlobalAddrOffOnly; +defm : WaitPat; +defm : WaitPat; //===----------------------------------------------------------------------===// // Atomic fences @@ -297,57 +197,16 @@ // store: (store $val, $ptr) // atomic_store: (store $ptr, $val) - -// Select stores with no constant offset. -multiclass AStorePatNoOffset { - def : Pat<(kind I32:$addr, ty:$val), - (!cast(inst#_A32) 0, 0, I32:$addr, ty:$val)>, +multiclass AStorePat { + def : Pat<(kind (AddrOps32 offset32_op:$offset, I32:$addr), ty:$val), + (!cast(inst#_A32) 0, $offset, $addr, $val)>, Requires<[HasAddr32, HasAtomics]>; - def : Pat<(kind I64:$addr, ty:$val), - (!cast(inst#_A64) 0, 0, I64:$addr, ty:$val)>, + def : Pat<(kind (AddrOps64 offset64_op:$offset, I64:$addr), ty:$val), + (!cast(inst#_A64) 0, $offset, $addr, $val)>, Requires<[HasAddr64, HasAtomics]>; } -defm : AStorePatNoOffset; -defm : AStorePatNoOffset; - -// Select stores with a constant offset. - -// Pattern with address + immediate offset -multiclass AStorePatImmOff { - def : Pat<(kind (operand I32:$addr, imm:$off), ty:$val), - (!cast(inst#_A32) 0, imm:$off, I32:$addr, ty:$val)>, - Requires<[HasAddr32, HasAtomics]>; - def : Pat<(kind (operand I64:$addr, imm:$off), ty:$val), - (!cast(inst#_A64) 0, imm:$off, I64:$addr, ty:$val)>, - Requires<[HasAddr64, HasAtomics]>; -} -defm : AStorePatImmOff; -defm : AStorePatImmOff; - -// Select stores with just a constant offset. -multiclass AStorePatOffsetOnly { - def : Pat<(kind imm:$off, ty:$val), - (!cast(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$val)>, - Requires<[HasAddr32, HasAtomics]>; - def : Pat<(kind imm:$off, ty:$val), - (!cast(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$val)>, - Requires<[HasAddr64, HasAtomics]>; -} -defm : AStorePatOffsetOnly; -defm : AStorePatOffsetOnly; - -multiclass AStorePatGlobalAddrOffOnly { - def : Pat<(kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val), - (!cast(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, - Requires<[HasAddr32, HasAtomics, IsNotPIC]>; - def : Pat<(kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val), - (!cast(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>, - Requires<[HasAddr64, HasAtomics, IsNotPIC]>; -} -defm : AStorePatGlobalAddrOffOnly; -defm : AStorePatGlobalAddrOffOnly; - +defm : AStorePat; +defm : AStorePat; // Truncating stores. defm ATOMIC_STORE8_I32 : AtomicStore; @@ -368,43 +227,12 @@ def trunc_astore_16_64 : trunc_astore_64; def trunc_astore_32_64 : trunc_astore_64; - // Truncating stores with no constant offset -defm : AStorePatNoOffset; -defm : AStorePatNoOffset; -defm : AStorePatNoOffset; -defm : AStorePatNoOffset; -defm : AStorePatNoOffset; - -// Truncating stores with a constant offset -defm : AStorePatImmOff; -defm : AStorePatImmOff; -defm : AStorePatImmOff; -defm : AStorePatImmOff; -defm : AStorePatImmOff; -defm : AStorePatImmOff; -defm : AStorePatImmOff; -defm : AStorePatImmOff; -defm : AStorePatImmOff; -defm : AStorePatImmOff; - -// Truncating stores with just a constant offset -defm : AStorePatOffsetOnly; -defm : AStorePatOffsetOnly; -defm : AStorePatOffsetOnly; -defm : AStorePatOffsetOnly; -defm : AStorePatOffsetOnly; - -defm : AStorePatGlobalAddrOffOnly; -defm : AStorePatGlobalAddrOffOnly; -defm : AStorePatGlobalAddrOffOnly; -defm : AStorePatGlobalAddrOffOnly; -defm : AStorePatGlobalAddrOffOnly; - +defm : AStorePat; +defm : AStorePat; +defm : AStorePat; +defm : AStorePat; +defm : AStorePat; //===----------------------------------------------------------------------===// // Atomic binary read-modify-writes @@ -506,64 +334,20 @@ defm ATOMIC_RMW32_U_XCHG_I64 : WebAssemblyBinRMW; -// Select binary RMWs with no constant offset. -multiclass BinRMWPatNoOffset { - def : Pat<(ty (kind I32:$addr, ty:$val)), - (!cast(inst#_A32) 0, 0, I32:$addr, ty:$val)>, +multiclass BinRMWPat { + def : Pat<(ty (kind (AddrOps32 offset32_op:$offset, I32:$addr), ty:$val)), + (!cast(inst#_A32) 0, $offset, $addr, $val)>, Requires<[HasAddr32, HasAtomics]>; - def : Pat<(ty (kind I64:$addr, ty:$val)), - (!cast(inst#_A64) 0, 0, I64:$addr, ty:$val)>, + def : Pat<(ty (kind (AddrOps64 offset64_op:$offset, I64:$addr), ty:$val)), + (!cast(inst#_A64) 0, $offset, $addr, $val)>, Requires<[HasAddr64, HasAtomics]>; } -// Select binary RMWs with a constant offset. - -// Pattern with address + immediate offset -multiclass BinRMWPatImmOff { - def : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$val)), - (!cast(inst#_A32) 0, imm:$off, I32:$addr, ty:$val)>, - Requires<[HasAddr32, HasAtomics]>; - def : Pat<(ty (kind (operand I64:$addr, imm:$off), ty:$val)), - (!cast(inst#_A64) 0, imm:$off, I64:$addr, ty:$val)>, - Requires<[HasAddr64, HasAtomics]>; -} - -// Select binary RMWs with just a constant offset. -multiclass BinRMWPatOffsetOnly { - def : Pat<(ty (kind imm:$off, ty:$val)), - (!cast(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$val)>, - Requires<[HasAddr32, HasAtomics]>; - def : Pat<(ty (kind imm:$off, ty:$val)), - (!cast(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$val)>, - Requires<[HasAddr64, HasAtomics]>; -} - -multiclass BinRMWPatGlobalAddrOffOnly { - def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val)), - (!cast(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>, - Requires<[HasAddr32, HasAtomics, IsNotPIC]>; - def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val)), - (!cast(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>, - Requires<[HasAddr64, HasAtomics, IsNotPIC]>; -} - // Patterns for various addressing modes. multiclass BinRMWPattern { - defm : BinRMWPatNoOffset; - defm : BinRMWPatNoOffset; - - defm : BinRMWPatImmOff; - defm : BinRMWPatImmOff; - defm : BinRMWPatImmOff; - defm : BinRMWPatImmOff; - - defm : BinRMWPatOffsetOnly; - defm : BinRMWPatOffsetOnly; - - defm : BinRMWPatGlobalAddrOffOnly; - defm : BinRMWPatGlobalAddrOffOnly; + defm : BinRMWPat; + defm : BinRMWPat; } defm : BinRMWPattern { - // Truncating-extending binary RMWs with no constant offset - defm : BinRMWPatNoOffset, inst8_32>; - defm : BinRMWPatNoOffset, inst16_32>; - defm : BinRMWPatNoOffset, inst8_64>; - defm : BinRMWPatNoOffset, inst16_64>; - defm : BinRMWPatNoOffset, inst32_64>; - - defm : BinRMWPatNoOffset, inst8_32>; - defm : BinRMWPatNoOffset, inst16_32>; - defm : BinRMWPatNoOffset, inst8_64>; - defm : BinRMWPatNoOffset, inst16_64>; - - // Truncating-extending binary RMWs with a constant offset - defm : BinRMWPatImmOff, regPlusImm, inst8_32>; - defm : BinRMWPatImmOff, regPlusImm, - inst16_32>; - defm : BinRMWPatImmOff, regPlusImm, inst8_64>; - defm : BinRMWPatImmOff, regPlusImm, - inst16_64>; - defm : BinRMWPatImmOff, regPlusImm, - inst32_64>; - defm : BinRMWPatImmOff, or_is_add, inst8_32>; - defm : BinRMWPatImmOff, or_is_add, inst16_32>; - defm : BinRMWPatImmOff, or_is_add, inst8_64>; - defm : BinRMWPatImmOff, or_is_add, inst16_64>; - defm : BinRMWPatImmOff, or_is_add, inst32_64>; - - defm : BinRMWPatImmOff, regPlusImm, inst8_32>; - defm : BinRMWPatImmOff, regPlusImm, - inst16_32>; - defm : BinRMWPatImmOff, regPlusImm, inst8_64>; - defm : BinRMWPatImmOff, regPlusImm, - inst16_64>; - defm : BinRMWPatImmOff, or_is_add, inst8_32>; - defm : BinRMWPatImmOff, or_is_add, inst16_32>; - defm : BinRMWPatImmOff, or_is_add, inst8_64>; - defm : BinRMWPatImmOff, or_is_add, inst16_64>; - - // Truncating-extending binary RMWs with just a constant offset - defm : BinRMWPatOffsetOnly, inst8_32>; - defm : BinRMWPatOffsetOnly, inst16_32>; - defm : BinRMWPatOffsetOnly, inst8_64>; - defm : BinRMWPatOffsetOnly, inst16_64>; - defm : BinRMWPatOffsetOnly, inst32_64>; - - defm : BinRMWPatOffsetOnly, inst8_32>; - defm : BinRMWPatOffsetOnly, inst16_32>; - defm : BinRMWPatOffsetOnly, inst8_64>; - defm : BinRMWPatOffsetOnly, inst16_64>; - - defm : BinRMWPatGlobalAddrOffOnly, inst8_32>; - defm : BinRMWPatGlobalAddrOffOnly, inst16_32>; - defm : BinRMWPatGlobalAddrOffOnly, inst8_64>; - defm : BinRMWPatGlobalAddrOffOnly, inst16_64>; - defm : BinRMWPatGlobalAddrOffOnly, inst32_64>; - - defm : BinRMWPatGlobalAddrOffOnly, inst8_32>; - defm : BinRMWPatGlobalAddrOffOnly, inst16_32>; - defm : BinRMWPatGlobalAddrOffOnly, inst8_64>; - defm : BinRMWPatGlobalAddrOffOnly, inst16_64>; + // Truncating-extending binary RMWs + defm : BinRMWPat, inst8_32>; + defm : BinRMWPat, inst16_32>; + defm : BinRMWPat, inst8_64>; + defm : BinRMWPat, inst16_64>; + defm : BinRMWPat, inst32_64>; + + defm : BinRMWPat, inst8_32>; + defm : BinRMWPat, inst16_32>; + defm : BinRMWPat, inst8_64>; + defm : BinRMWPat, inst16_64>; } defm : BinRMWTruncExtPattern< @@ -740,70 +475,17 @@ defm ATOMIC_RMW32_U_CMPXCHG_I64 : WebAssemblyTerRMW; -// Select ternary RMWs with no constant offset. -multiclass TerRMWPatNoOffset { - def : Pat<(ty (kind I32:$addr, ty:$exp, ty:$new)), - (!cast(inst#_A32) 0, 0, I32:$addr, ty:$exp, ty:$new)>, - Requires<[HasAddr32, HasAtomics]>; - def : Pat<(ty (kind I64:$addr, ty:$exp, ty:$new)), - (!cast(inst#_A64) 0, 0, I64:$addr, ty:$exp, ty:$new)>, - Requires<[HasAddr64, HasAtomics]>; -} - -// Select ternary RMWs with a constant offset. - -// Pattern with address + immediate offset -multiclass TerRMWPatImmOff { - def : Pat<(ty (kind (operand I32:$addr, imm:$off), ty:$exp, ty:$new)), - (!cast(inst#_A32) 0, imm:$off, I32:$addr, ty:$exp, ty:$new)>, +multiclass TerRMWPat { + def : Pat<(ty (kind (AddrOps32 offset32_op:$offset, I32:$addr), ty:$exp, ty:$new)), + (!cast(inst#_A32) 0, $offset, $addr, $exp, $new)>, Requires<[HasAddr32, HasAtomics]>; - def : Pat<(ty (kind (operand I64:$addr, imm:$off), ty:$exp, ty:$new)), - (!cast(inst#_A64) 0, imm:$off, I64:$addr, ty:$exp, ty:$new)>, + def : Pat<(ty (kind (AddrOps64 offset64_op:$offset, I64:$addr), ty:$exp, ty:$new)), + (!cast(inst#_A64) 0, $offset, $addr, $exp, $new)>, Requires<[HasAddr64, HasAtomics]>; } -// Select ternary RMWs with just a constant offset. -multiclass TerRMWPatOffsetOnly { - def : Pat<(ty (kind imm:$off, ty:$exp, ty:$new)), - (!cast(inst#_A32) 0, imm:$off, (CONST_I32 0), ty:$exp, - ty:$new)>; - def : Pat<(ty (kind imm:$off, ty:$exp, ty:$new)), - (!cast(inst#_A64) 0, imm:$off, (CONST_I64 0), ty:$exp, - ty:$new)>; -} - -multiclass TerRMWPatGlobalAddrOffOnly { - def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp, ty:$new)), - (!cast(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp, - ty:$new)>, - Requires<[HasAddr32, HasAtomics, IsNotPIC]>; - def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp, ty:$new)), - (!cast(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp, - ty:$new)>, - Requires<[HasAddr64, HasAtomics, IsNotPIC]>; -} - -// Patterns for various addressing modes. -multiclass TerRMWPattern { - defm : TerRMWPatNoOffset; - defm : TerRMWPatNoOffset; - - defm : TerRMWPatImmOff; - defm : TerRMWPatImmOff; - defm : TerRMWPatImmOff; - defm : TerRMWPatImmOff; - - defm : TerRMWPatOffsetOnly; - defm : TerRMWPatOffsetOnly; - - defm : TerRMWPatGlobalAddrOffOnly; - defm : TerRMWPatGlobalAddrOffOnly; -} - -defm : TerRMWPattern; +defm : TerRMWPat; +defm : TerRMWPat; // Truncating & zero-extending ternary RMW patterns. // DAG legalization & optimization before instruction selection may introduce @@ -841,75 +523,13 @@ class sext_ter_rmw_16_64 : sext_ter_rmw_8_64; // 32->64 sext RMW gets selected as i32.atomic.rmw.***, i64.extend_i32_s -// Patterns for various addressing modes for truncating-extending ternary RMWs. -multiclass TerRMWTruncExtPattern< - PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, - string inst8_32, string inst16_32, string inst8_64, string inst16_64, - string inst32_64> { - // Truncating-extending ternary RMWs with no constant offset - defm : TerRMWPatNoOffset, inst8_32>; - defm : TerRMWPatNoOffset, inst16_32>; - defm : TerRMWPatNoOffset, inst8_64>; - defm : TerRMWPatNoOffset, inst16_64>; - defm : TerRMWPatNoOffset, inst32_64>; - - defm : TerRMWPatNoOffset, inst8_32>; - defm : TerRMWPatNoOffset, inst16_32>; - defm : TerRMWPatNoOffset, inst8_64>; - defm : TerRMWPatNoOffset, inst16_64>; - - // Truncating-extending ternary RMWs with a constant offset - defm : TerRMWPatImmOff, regPlusImm, inst8_32>; - defm : TerRMWPatImmOff, regPlusImm, - inst16_32>; - defm : TerRMWPatImmOff, regPlusImm, inst8_64>; - defm : TerRMWPatImmOff, regPlusImm, - inst16_64>; - defm : TerRMWPatImmOff, regPlusImm, - inst32_64>; - defm : TerRMWPatImmOff, or_is_add, inst8_32>; - defm : TerRMWPatImmOff, or_is_add, inst16_32>; - defm : TerRMWPatImmOff, or_is_add, inst8_64>; - defm : TerRMWPatImmOff, or_is_add, inst16_64>; - defm : TerRMWPatImmOff, or_is_add, inst32_64>; - - defm : TerRMWPatImmOff, regPlusImm, inst8_32>; - defm : TerRMWPatImmOff, regPlusImm, - inst16_32>; - defm : TerRMWPatImmOff, regPlusImm, inst8_64>; - defm : TerRMWPatImmOff, regPlusImm, - inst16_64>; - defm : TerRMWPatImmOff, or_is_add, inst8_32>; - defm : TerRMWPatImmOff, or_is_add, inst16_32>; - defm : TerRMWPatImmOff, or_is_add, inst8_64>; - defm : TerRMWPatImmOff, or_is_add, inst16_64>; - - // Truncating-extending ternary RMWs with just a constant offset - defm : TerRMWPatOffsetOnly, inst8_32>; - defm : TerRMWPatOffsetOnly, inst16_32>; - defm : TerRMWPatOffsetOnly, inst8_64>; - defm : TerRMWPatOffsetOnly, inst16_64>; - defm : TerRMWPatOffsetOnly, inst32_64>; - - defm : TerRMWPatOffsetOnly, inst8_32>; - defm : TerRMWPatOffsetOnly, inst16_32>; - defm : TerRMWPatOffsetOnly, inst8_64>; - defm : TerRMWPatOffsetOnly, inst16_64>; - - defm : TerRMWPatGlobalAddrOffOnly, inst8_32>; - defm : TerRMWPatGlobalAddrOffOnly, inst16_32>; - defm : TerRMWPatGlobalAddrOffOnly, inst8_64>; - defm : TerRMWPatGlobalAddrOffOnly, inst16_64>; - defm : TerRMWPatGlobalAddrOffOnly, inst32_64>; - - defm : TerRMWPatGlobalAddrOffOnly, inst8_32>; - defm : TerRMWPatGlobalAddrOffOnly, inst16_32>; - defm : TerRMWPatGlobalAddrOffOnly, inst8_64>; - defm : TerRMWPatGlobalAddrOffOnly, inst16_64>; -} +defm : TerRMWPat, "ATOMIC_RMW8_U_CMPXCHG_I32">; +defm : TerRMWPat, "ATOMIC_RMW16_U_CMPXCHG_I32">; +defm : TerRMWPat, "ATOMIC_RMW8_U_CMPXCHG_I64">; +defm : TerRMWPat, "ATOMIC_RMW16_U_CMPXCHG_I64">; +defm : TerRMWPat, "ATOMIC_RMW32_U_CMPXCHG_I64">; -defm : TerRMWTruncExtPattern< - atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, - "ATOMIC_RMW8_U_CMPXCHG_I32", "ATOMIC_RMW16_U_CMPXCHG_I32", - "ATOMIC_RMW8_U_CMPXCHG_I64", "ATOMIC_RMW16_U_CMPXCHG_I64", - "ATOMIC_RMW32_U_CMPXCHG_I64">; +defm : TerRMWPat, "ATOMIC_RMW8_U_CMPXCHG_I32">; +defm : TerRMWPat, "ATOMIC_RMW16_U_CMPXCHG_I32">; +defm : TerRMWPat, "ATOMIC_RMW8_U_CMPXCHG_I64">; +defm : TerRMWPat, "ATOMIC_RMW16_U_CMPXCHG_I64">; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -19,25 +19,19 @@ // local types. These memory-only types instead zero- or sign-extend into local // types when loading, and truncate when storing. -// WebAssembly constant offsets are performed as unsigned with infinite -// precision, so we need to check for NoUnsignedWrap so that we don't fold an -// offset for an add that needs wrapping. -def regPlusImm : PatFrag<(ops node:$addr, node:$off), - (add node:$addr, node:$off), - [{ return N->getFlags().hasNoUnsignedWrap(); }]>; +// Address Operands -// Treat an 'or' node as an 'add' if the or'ed bits are known to be zero. -def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ - if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) - return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - - KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0); - KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0); - return (~Known0.Zero & ~Known1.Zero) == 0; -}]>; - -// We don't need a regPlusES because external symbols never have constant -// offsets folded into them, so we can just use add. +// These patterns match the static (offset) and dynamic (address stack operand) +// operands for loads and stores, based on a combination of target global +// addresses and constants. +// For example, +// (load (add tga, x)) -> load offset=tga, addr=x +// (store v, tga) -> store v, offset=tga, addr=0 +// (load (add const, x)) -> load offset=const, addr=x +// (store v, const) -> store v, offset=const, addr=0 +// (load x) -> load offset=0, addr=x +def AddrOps32 : ComplexPattern; +def AddrOps64 : ComplexPattern; // Defines atomic and non-atomic loads, regular and extending. multiclass WebAssemblyLoad load tga(x) -// tga -> load tga(0) -// (add const x) -> load const(x) -// const -> load const(0) -// x -> load 0(x) -def LoadOps32 : ComplexPattern; -def LoadOps64 : ComplexPattern; - multiclass LoadPat { - def : Pat<(ty (kind (LoadOps32 offset32_op:$offset, I32:$addr))), + def : Pat<(ty (kind (AddrOps32 offset32_op:$offset, I32:$addr))), (!cast(Name # "_A32") 0, offset32_op:$offset, I32:$addr)>, Requires<[HasAddr32]>; - def : Pat<(ty (kind (LoadOps64 offset64_op:$offset, I64:$addr))), + def : Pat<(ty (kind (AddrOps64 offset64_op:$offset, I64:$addr))), (!cast(Name # "_A64") 0, offset64_op:$offset, I64:$addr)>, Requires<[HasAddr64]>; - } defm : LoadPat; @@ -159,69 +139,25 @@ defm STORE_F32 : WebAssemblyStore; defm STORE_F64 : WebAssemblyStore; -// Select stores with no constant offset. -multiclass StorePatNoOffset { - def : Pat<(node ty:$val, I32:$addr), - (!cast(inst # "_A32") 0, 0, I32:$addr, ty:$val)>, - Requires<[HasAddr32]>; - def : Pat<(node ty:$val, I64:$addr), - (!cast(inst # "_A64") 0, 0, I64:$addr, ty:$val)>, - Requires<[HasAddr64]>; +multiclass StorePat { + def : Pat<(kind ty:$val, (AddrOps32 offset32_op:$offset, I32:$addr)), + (!cast(Name # "_A32") 0, + offset32_op:$offset, + I32:$addr, + ty:$val)>, + Requires<[HasAddr32]>; + def : Pat<(kind ty:$val, (AddrOps64 offset64_op:$offset, I64:$addr)), + (!cast(Name # "_A64") 0, + offset64_op:$offset, + I64:$addr, + ty:$val)>, + Requires<[HasAddr64]>; } -defm : StorePatNoOffset; -defm : StorePatNoOffset; -defm : StorePatNoOffset; -defm : StorePatNoOffset; - -// Select stores with a constant offset. -multiclass StorePatImmOff { - def : Pat<(kind ty:$val, (operand I32:$addr, imm:$off)), - (!cast(inst # "_A32") 0, imm:$off, I32:$addr, ty:$val)>, - Requires<[HasAddr32]>; - def : Pat<(kind ty:$val, (operand I64:$addr, imm:$off)), - (!cast(inst # "_A64") 0, imm:$off, I64:$addr, ty:$val)>, - Requires<[HasAddr64]>; -} - -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; - -// Select stores with just a constant offset. -multiclass StorePatOffsetOnly { - def : Pat<(kind ty:$val, imm:$off), - (!cast(inst # "_A32") 0, imm:$off, (CONST_I32 0), ty:$val)>, - Requires<[HasAddr32]>; - def : Pat<(kind ty:$val, imm:$off), - (!cast(inst # "_A64") 0, imm:$off, (CONST_I64 0), ty:$val)>, - Requires<[HasAddr64]>; -} -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; - -multiclass StorePatGlobalAddrOffOnly { - def : Pat<(kind ty:$val, (WebAssemblyWrapper tglobaladdr:$off)), - (!cast(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0), - ty:$val)>, - Requires<[IsNotPIC, HasAddr32]>; - def : Pat<(kind ty:$val, (WebAssemblyWrapper tglobaladdr:$off)), - (!cast(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0), - ty:$val)>, - Requires<[IsNotPIC, HasAddr64]>; -} -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; +defm : StorePat; +defm : StorePat; +defm : StorePat; +defm : StorePat; // Truncating store. defm STORE8_I32 : WebAssemblyStore; @@ -230,36 +166,11 @@ defm STORE16_I64 : WebAssemblyStore; defm STORE32_I64 : WebAssemblyStore; -// Select truncating stores with no constant offset. -defm : StorePatNoOffset; -defm : StorePatNoOffset; -defm : StorePatNoOffset; -defm : StorePatNoOffset; -defm : StorePatNoOffset; - -// Select truncating stores with a constant offset. -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatImmOff; - -// Select truncating stores with just a constant offset. -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatOffsetOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; -defm : StorePatGlobalAddrOffOnly; +defm : StorePat; +defm : StorePat; +defm : StorePat; +defm : StorePat; +defm : StorePat; multiclass MemoryOps { // Current memory size. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -366,11 +366,7 @@ // Def store patterns from WebAssemblyInstrMemory.td for vector types foreach vec = AllVecs in { -defm : StorePatNoOffset; -defm : StorePatImmOff; -defm : StorePatImmOff; -defm : StorePatOffsetOnly; -defm : StorePatGlobalAddrOffOnly; +defm : StorePat; } // Store lane @@ -399,13 +395,16 @@ defm "" : SIMDStoreLane; defm "" : SIMDStoreLane; -// Select stores with no constant offset. -multiclass StoreLanePatNoOffset { - def : Pat<(kind (i32 I32:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)), - (!cast("STORE_LANE_"#vec#"_A32") 0, 0, imm:$idx, $addr, $vec)>, +multiclass StoreLanePat { + def : Pat<(kind (AddrOps32 offset32_op:$offset, I32:$addr), + (vec.vt V128:$vec), + (i32 vec.lane_idx:$idx)), + (!cast("STORE_LANE_"#vec#"_A32") 0, $offset, imm:$idx, $addr, $vec)>, Requires<[HasAddr32]>; - def : Pat<(kind (i64 I64:$addr), (vec.vt V128:$vec), (i32 vec.lane_idx:$idx)), - (!cast("STORE_LANE_"#vec#"_A64") 0, 0, imm:$idx, $addr, $vec)>, + def : Pat<(kind (AddrOps64 offset64_op:$offset, I64:$addr), + (vec.vt V128:$vec), + (i32 vec.lane_idx:$idx)), + (!cast("STORE_LANE_"#vec#"_A64") 0, $offset, imm:$idx, $addr, $vec)>, Requires<[HasAddr64]>; } @@ -424,10 +423,10 @@ // TODO: floating point lanes as well let AddedComplexity = 1 in { -defm : StoreLanePatNoOffset; -defm : StoreLanePatNoOffset; -defm : StoreLanePatNoOffset; -defm : StoreLanePatNoOffset; +defm : StoreLanePat; +defm : StoreLanePat; +defm : StoreLanePat; +defm : StoreLanePat; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll --- a/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll @@ -155,10 +155,8 @@ ; CHECK: .functype store_lane_i8_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store8_lane 0, 0 +; CHECK-NEXT: v128.store8_lane 24, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i8* %p to i32 %r = add nuw i32 %q, 24 @@ -173,10 +171,8 @@ ; CHECK: .functype store_lane_i8_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 6 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store8_lane 0, 0 +; CHECK-NEXT: v128.store8_lane 6, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i8, i8* %p, i32 6 %x = extractelement <16 x i8> %v, i32 0 @@ -238,9 +234,9 @@ ; CHECK-LABEL: store_lane_i8_to_numeric_address: ; CHECK: .functype store_lane_i8_to_numeric_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 42 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store8_lane 0, 0 +; CHECK-NEXT: v128.store8_lane 42, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 42 to i8* %x = extractelement <16 x i8> %v, i32 0 @@ -252,9 +248,9 @@ ; CHECK-LABEL: store_lane_i8_from_global_address: ; CHECK: .functype store_lane_i8_from_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_i8 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store8_lane 0, 0 +; CHECK-NEXT: v128.store8_lane gv_i8, 0 ; CHECK-NEXT: # fallthrough-return %x = extractelement <16 x i8> %v, i32 0 store i8 %x, i8* @gv_i8 @@ -408,10 +404,8 @@ ; CHECK: .functype store_lane_i16_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store16_lane 0, 0 +; CHECK-NEXT: v128.store16_lane 24, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i16* %p to i32 %r = add nuw i32 %q, 24 @@ -426,10 +420,8 @@ ; CHECK: .functype store_lane_i16_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 12 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store16_lane 0, 0 +; CHECK-NEXT: v128.store16_lane 12, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i16, i16* %p, i32 6 %x = extractelement <8 x i16> %v, i32 0 @@ -491,9 +483,9 @@ ; CHECK-LABEL: store_lane_i16_to_numeric_address: ; CHECK: .functype store_lane_i16_to_numeric_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 42 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store16_lane 0, 0 +; CHECK-NEXT: v128.store16_lane 42, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 42 to i16* %x = extractelement <8 x i16> %v, i32 0 @@ -505,9 +497,9 @@ ; CHECK-LABEL: store_lane_i16_from_global_address: ; CHECK: .functype store_lane_i16_from_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_i16 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store16_lane 0, 0 +; CHECK-NEXT: v128.store16_lane gv_i16, 0 ; CHECK-NEXT: # fallthrough-return %x = extractelement <8 x i16> %v, i32 0 store i16 %x, i16* @gv_i16 @@ -661,10 +653,8 @@ ; CHECK: .functype store_lane_i32_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store32_lane 0, 0 +; CHECK-NEXT: v128.store32_lane 24, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i32* %p to i32 %r = add nuw i32 %q, 24 @@ -679,10 +669,8 @@ ; CHECK: .functype store_lane_i32_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store32_lane 0, 0 +; CHECK-NEXT: v128.store32_lane 24, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, i32* %p, i32 6 %x = extractelement <4 x i32> %v, i32 0 @@ -744,9 +732,9 @@ ; CHECK-LABEL: store_lane_i32_to_numeric_address: ; CHECK: .functype store_lane_i32_to_numeric_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 42 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store32_lane 0, 0 +; CHECK-NEXT: v128.store32_lane 42, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 42 to i32* %x = extractelement <4 x i32> %v, i32 0 @@ -758,9 +746,9 @@ ; CHECK-LABEL: store_lane_i32_from_global_address: ; CHECK: .functype store_lane_i32_from_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_i32 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store32_lane 0, 0 +; CHECK-NEXT: v128.store32_lane gv_i32, 0 ; CHECK-NEXT: # fallthrough-return %x = extractelement <4 x i32> %v, i32 0 store i32 %x, i32* @gv_i32 @@ -914,10 +902,8 @@ ; CHECK: .functype store_lane_i64_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 24, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i64* %p to i32 %r = add nuw i32 %q, 24 @@ -932,10 +918,8 @@ ; CHECK: .functype store_lane_i64_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 48 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 48, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i64, i64* %p, i32 6 %x = extractelement <2 x i64> %v, i32 0 @@ -997,9 +981,9 @@ ; CHECK-LABEL: store_lane_i64_to_numeric_address: ; CHECK: .functype store_lane_i64_to_numeric_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 42 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 42, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 42 to i64* %x = extractelement <2 x i64> %v, i32 0 @@ -1011,9 +995,9 @@ ; CHECK-LABEL: store_lane_i64_from_global_address: ; CHECK: .functype store_lane_i64_from_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_i64 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane gv_i64, 0 ; CHECK-NEXT: # fallthrough-return %x = extractelement <2 x i64> %v, i32 0 store i64 %x, i64* @gv_i64 diff --git a/llvm/test/CodeGen/WebAssembly/simd-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-offset.ll --- a/llvm/test/CodeGen/WebAssembly/simd-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-offset.ll @@ -949,10 +949,8 @@ ; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 16, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint <8 x i8>* %p to i32 %r = add nuw i32 %q, 16 @@ -979,10 +977,8 @@ ; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 8, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <8 x i8>, <8 x i8>* %p, i32 1 store <8 x i8> %v , <8 x i8>* %s @@ -1100,9 +1096,9 @@ ; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address: ; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 32 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 32, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <8 x i8>* store <8 x i8> %v , <8 x i8>* %s @@ -1125,9 +1121,9 @@ ; CHECK-LABEL: store_narrowing_v8i16_to_global_address: ; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_v8i8 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane gv_v8i8, 0 ; CHECK-NEXT: # fallthrough-return store <8 x i8> %v , <8 x i8>* @gv_v8i8 ret void @@ -1417,11 +1413,11 @@ ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_folded_gep_offset: ; CHECK: .functype load_zext_v4i8_to_v4i32_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.load32_zero 4 -; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15 -; CHECK-NEXT: # fallthrough-return +; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load32_zero 4 +; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i8>, <4 x i8>* %p, i32 1 %v = load <4 x i8>, <4 x i8>* %s %v2 = zext <4 x i8> %v to <4 x i32> @@ -2032,10 +2028,8 @@ ; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 16, 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint <4 x i16>* %p to i32 %r = add nuw i32 %q, 16 @@ -2062,10 +2056,8 @@ ; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> () ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 8, 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i16>, <4 x i16>* %p, i32 1 store <4 x i16> %v , <4 x i16>* %s @@ -2183,9 +2175,9 @@ ; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address: ; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const 32 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane 32, 0 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 32 to <4 x i16>* store <4 x i16> %v , <4 x i16>* %s @@ -2208,9 +2200,9 @@ ; CHECK-LABEL: store_narrowing_v4i32_to_global_address: ; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> () ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i32.const gv_v4i16 +; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.store64_lane 0, 0 +; CHECK-NEXT: v128.store64_lane gv_v4i16, 0 ; CHECK-NEXT: # fallthrough-return store <4 x i16> %v , <4 x i16>* @gv_v4i16 ret void