Index: llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt +++ llvm/trunk/lib/Target/WebAssembly/CMakeLists.txt @@ -29,6 +29,7 @@ WebAssemblyRegNumbering.cpp WebAssemblyRegStackify.cpp WebAssemblySelectionDAGInfo.cpp + WebAssemblySetP2AlignOperands.cpp WebAssemblyStoreResults.cpp WebAssemblySubtarget.cpp WebAssemblyTargetMachine.cpp Index: llvm/trunk/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ llvm/trunk/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -93,6 +93,7 @@ const MCOperandInfo &Info = Desc.OpInfo[i]; switch (Info.OperandType) { case MCOI::OPERAND_IMMEDIATE: + case WebAssembly::OPERAND_P2ALIGN: case WebAssembly::OPERAND_BASIC_BLOCK: { if (Pos + sizeof(uint64_t) > Bytes.size()) return MCDisassembler::Fail; Index: llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -36,6 +36,8 @@ // Used by tblegen code. void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); Index: llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp +++ llvm/trunk/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -172,6 +172,16 @@ } } +void +WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + int64_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode())) + return; + O << ":p2align=" << Imm; +} + const char *llvm::WebAssembly::TypeToString(MVT Ty) { switch (Ty.SimpleTy) { case MVT::i32: Index: llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ llvm/trunk/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -45,7 +45,9 @@ /// Basic block label in a branch construct. OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET, /// Floating-point immediate. - OPERAND_FPIMM + OPERAND_FPIMM, + /// p2align immediate for load and store address alignment. + OPERAND_P2ALIGN }; /// WebAssembly-specific directive identifiers. @@ -86,4 +88,47 @@ #define GET_SUBTARGETINFO_ENUM #include "WebAssemblyGenSubtargetInfo.inc" +namespace llvm { +namespace WebAssembly { + +/// Return the default p2align value for a load or store with the given opcode. +inline unsigned GetDefaultP2Align(unsigned Opcode) { + switch (Opcode) { + case WebAssembly::LOAD8_S_I32: + case WebAssembly::LOAD8_U_I32: + case WebAssembly::LOAD8_S_I64: + case WebAssembly::LOAD8_U_I64: + case WebAssembly::STORE8_I32: + case WebAssembly::STORE8_I64: + return 0; + case WebAssembly::LOAD16_S_I32: + case WebAssembly::LOAD16_U_I32: + case WebAssembly::LOAD16_S_I64: + case WebAssembly::LOAD16_U_I64: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE16_I64: + return 1; + case WebAssembly::LOAD_I32: + case WebAssembly::LOAD_F32: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_F32: + case WebAssembly::LOAD32_S_I64: + case WebAssembly::LOAD32_U_I64: + case WebAssembly::STORE32_I64: + return 2; + case WebAssembly::LOAD_I64: + case WebAssembly::LOAD_F64: + case WebAssembly::STORE_I64: + case WebAssembly::STORE_F64: + return 3; + default: llvm_unreachable("Only loads and stores have p2align values"); + } +} + +/// The operand number of the stored value in a store instruction. +static const unsigned StoreValueOperandNo = 4; + +} // end namespace WebAssembly +} // end namespace llvm + #endif Index: llvm/trunk/lib/Target/WebAssembly/WebAssembly.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssembly.h +++ llvm/trunk/lib/Target/WebAssembly/WebAssembly.h @@ -28,6 +28,7 @@ FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createWebAssemblyArgumentMove(); +FunctionPass *createWebAssemblySetP2AlignOperands(); FunctionPass *createWebAssemblyStoreResults(); FunctionPass *createWebAssemblyRegStackify(); Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -84,6 +84,7 @@ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg) .addImm(0) .addReg(SPReg) + .addImm(2) // p2align .addMemOperand(LoadMMO); // Add/Subtract the frame size unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); @@ -102,6 +103,7 @@ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32) .addImm(0) .addReg(OffsetReg) + .addImm(2) // p2align .addReg(WebAssembly::SP32) .addMemOperand(MMO); } @@ -169,6 +171,7 @@ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32) .addImm(0) .addReg(OffsetReg) + .addImm(2) // p2align .addReg(WebAssembly::SP32) .addMemOperand(MMO); } Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -56,6 +56,8 @@ bool isCheapToSpeculateCtlz() const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align, + bool *Fast) const override; SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -285,6 +285,20 @@ return true; } +bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( + EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, + bool *Fast) const { + // WebAssembly supports unaligned accesses, though it should be declared + // with the p2align attribute on loads and stores which do so, and there + // may be a performance impact. We tell LLVM they're "fast" because + // for the kinds of things that LLVM uses this for (merging agacent stores + // of constants, etc.), WebAssembly implementations will either want the + // unaligned access or they'll split anyway. + if (Fast) + *Fast = true; + return true; +} + //===----------------------------------------------------------------------===// // WebAssembly Lowering private implementation. //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -76,6 +76,12 @@ def f64imm_op : Operand; } // OperandType = "OPERAND_FPIMM" +let OperandType = "OPERAND_P2ALIGN" in { +def P2Align : Operand { + let PrintMethod = "printWebAssemblyP2AlignOperand"; +} +} // OperandType = "OPERAND_P2ALIGN" + } // OperandNamespace = "WebAssembly" //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -46,325 +46,354 @@ let Defs = [ARGUMENTS] in { // Basic load. -def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], - "i32.load\t$dst, ${off}(${addr})">; -def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load\t$dst, ${off}(${addr})">; -def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr), [], - "f32.load\t$dst, ${off}(${addr})">; -def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr), [], - "f64.load\t$dst, ${off}(${addr})">; +def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i32.load\t$dst, ${off}(${addr})${p2align}">; +def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load\t$dst, ${off}(${addr})${p2align}">; +def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "f32.load\t$dst, ${off}(${addr})${p2align}">; +def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "f64.load\t$dst, ${off}(${addr})${p2align}">; } // Defs = [ARGUMENTS] // Select loads with no constant offset. -def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>; -def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>; -def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; -def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; +def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr, 0)>; +def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr, 0)>; +def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr, 0)>; +def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr, 0)>; // Select loads with a constant offset. def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))), - (LOAD_I32 imm:$off, $addr)>; + (LOAD_I32 imm:$off, $addr, 0)>; def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))), - (LOAD_I64 imm:$off, $addr)>; + (LOAD_I64 imm:$off, $addr, 0)>; def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))), - (LOAD_F32 imm:$off, $addr)>; + (LOAD_F32 imm:$off, $addr, 0)>; def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))), - (LOAD_F64 imm:$off, $addr)>; + (LOAD_F64 imm:$off, $addr, 0)>; def : Pat<(i32 (load (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD_I32 tglobaladdr:$off, $addr)>; + (LOAD_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (load (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD_I64 tglobaladdr:$off, $addr)>; + (LOAD_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(f32 (load (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD_F32 tglobaladdr:$off, $addr)>; + (LOAD_F32 tglobaladdr:$off, $addr, 0)>; def : Pat<(f64 (load (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD_F64 tglobaladdr:$off, $addr)>; + (LOAD_F64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD_I32 texternalsym:$off, $addr)>; + (LOAD_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD_I64 texternalsym:$off, $addr)>; + (LOAD_I64 texternalsym:$off, $addr, 0)>; def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD_F32 texternalsym:$off, $addr)>; + (LOAD_F32 texternalsym:$off, $addr, 0)>; def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD_F64 texternalsym:$off, $addr)>; + (LOAD_F64 texternalsym:$off, $addr, 0)>; // Select loads with just a constant offset. -def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0))>; -def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))), - (LOAD_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))), - (LOAD_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))), - (LOAD_F32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD_F32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))), - (LOAD_F64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD_F64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))), - (LOAD_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))), - (LOAD_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))), - (LOAD_F32 texternalsym:$off, (CONST_I32 0))>; + (LOAD_F32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))), - (LOAD_F64 texternalsym:$off, (CONST_I32 0))>; + (LOAD_F64 texternalsym:$off, (CONST_I32 0), 0)>; let Defs = [ARGUMENTS] in { // Extending load. -def LOAD8_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], - "i32.load8_s\t$dst, ${off}(${addr})">; -def LOAD8_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], - "i32.load8_u\t$dst, ${off}(${addr})">; -def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], - "i32.load16_s\t$dst, ${off}(${addr})">; -def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], - "i32.load16_u\t$dst, ${off}(${addr})">; -def LOAD8_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load8_s\t$dst, ${off}(${addr})">; -def LOAD8_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load8_u\t$dst, ${off}(${addr})">; -def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load16_s\t$dst, ${off}(${addr})">; -def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load16_u\t$dst, ${off}(${addr})">; -def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load32_s\t$dst, ${off}(${addr})">; -def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load32_u\t$dst, ${off}(${addr})">; +def LOAD8_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i32.load8_s\t$dst, ${off}(${addr})${p2align}">; +def LOAD8_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i32.load8_u\t$dst, ${off}(${addr})${p2align}">; +def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i32.load16_s\t$dst, ${off}(${addr})${p2align}">; +def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i32.load16_u\t$dst, ${off}(${addr})${p2align}">; +def LOAD8_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load8_s\t$dst, ${off}(${addr})${p2align}">; +def LOAD8_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load8_u\t$dst, ${off}(${addr})${p2align}">; +def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load16_s\t$dst, ${off}(${addr})${p2align}">; +def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load16_u\t$dst, ${off}(${addr})${p2align}">; +def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load32_s\t$dst, ${off}(${addr})${p2align}">; +def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load32_u\t$dst, ${off}(${addr})${p2align}">; } // Defs = [ARGUMENTS] // Select extending loads with no constant offset. -def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>; -def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; -def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr)>; -def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; -def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr)>; -def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; -def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr)>; -def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; -def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>; -def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; +def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr, 0)>; +def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr, 0)>; +def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr, 0)>; +def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>; +def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr, 0)>; +def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr, 0)>; +def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr, 0)>; +def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>; +def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr, 0)>; +def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>; // Select extending loads with a constant offset. def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_S_I32 imm:$off, $addr)>; + (LOAD8_S_I32 imm:$off, $addr, 0)>; def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_U_I32 imm:$off, $addr)>; + (LOAD8_U_I32 imm:$off, $addr, 0)>; def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_S_I32 imm:$off, $addr)>; + (LOAD16_S_I32 imm:$off, $addr, 0)>; def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_U_I32 imm:$off, $addr)>; + (LOAD16_U_I32 imm:$off, $addr, 0)>; def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_S_I64 imm:$off, $addr)>; + (LOAD8_S_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_U_I64 imm:$off, $addr)>; + (LOAD8_U_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_S_I64 imm:$off, $addr)>; + (LOAD16_S_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_U_I64 imm:$off, $addr)>; + (LOAD16_U_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))), - (LOAD32_S_I64 imm:$off, $addr)>; + (LOAD32_S_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))), - (LOAD32_U_I64 imm:$off, $addr)>; + (LOAD32_U_I64 imm:$off, $addr, 0)>; def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_S_I32 tglobaladdr:$off, $addr)>; + (LOAD8_S_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_U_I32 tglobaladdr:$off, $addr)>; + (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_S_I32 tglobaladdr:$off, $addr)>; + (LOAD16_S_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_U_I32 tglobaladdr:$off, $addr)>; + (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_S_I64 tglobaladdr:$off, $addr)>; + (LOAD8_S_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_U_I64 tglobaladdr:$off, $addr)>; + (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_S_I64 tglobaladdr:$off, $addr)>; + (LOAD16_S_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_U_I64 tglobaladdr:$off, $addr)>; + (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD32_S_I64 tglobaladdr:$off, $addr)>; + (LOAD32_S_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD32_U_I64 tglobaladdr:$off, $addr)>; + (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (sextloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_S_I32 texternalsym:$off, $addr)>; + (LOAD8_S_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i32 (zextloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_U_I32 texternalsym:$off, $addr)>; + (LOAD8_U_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i32 (sextloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_S_I32 texternalsym:$off, $addr)>; + (LOAD16_S_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i32 (zextloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_U_I32 texternalsym:$off, $addr)>; + (LOAD16_U_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (sextloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_S_I64 texternalsym:$off, $addr)>; + (LOAD8_S_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (zextloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_U_I64 texternalsym:$off, $addr)>; + (LOAD8_U_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (sextloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_S_I64 texternalsym:$off, $addr)>; + (LOAD16_S_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (zextloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_U_I64 texternalsym:$off, $addr)>; + (LOAD16_U_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (sextloadi32 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD32_S_I64 texternalsym:$off, $addr)>; + (LOAD32_S_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (zextloadi32 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD32_U_I64 texternalsym:$off, $addr)>; + (LOAD32_U_I64 texternalsym:$off, $addr, 0)>; // Select extending loads with just a constant offset. -def : Pat<(i32 (sextloadi8 imm:$off)), (LOAD8_S_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i32 (zextloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i32 (sextloadi16 imm:$off)), (LOAD16_S_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i32 (zextloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (sextloadi8 imm:$off)), (LOAD8_S_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (zextloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (sextloadi16 imm:$off)), (LOAD16_S_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (zextloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (sextloadi32 imm:$off)), (LOAD32_S_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (zextloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi8 imm:$off)), + (LOAD8_S_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i32 (zextloadi8 imm:$off)), + (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i32 (sextloadi16 imm:$off)), + (LOAD16_S_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i32 (zextloadi16 imm:$off)), + (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (sextloadi8 imm:$off)), + (LOAD8_S_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (zextloadi8 imm:$off)), + (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (sextloadi16 imm:$off)), + (LOAD16_S_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (zextloadi16 imm:$off)), + (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (sextloadi32 imm:$off)), + (LOAD32_S_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (zextloadi32 imm:$off)), + (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))), - (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))), - (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0), 0)>; // Resolve "don't care" extending loads to zero-extending loads. This is // somewhat arbitrary, but zero-extending is conceptually simpler. // Select "don't care" extending loads with no constant offset. -def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; -def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; -def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; -def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; -def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; +def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr, 0)>; +def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>; +def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr, 0)>; +def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>; +def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>; // Select "don't care" extending loads with a constant offset. def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_U_I32 imm:$off, $addr)>; + (LOAD8_U_I32 imm:$off, $addr, 0)>; def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_U_I32 imm:$off, $addr)>; + (LOAD16_U_I32 imm:$off, $addr, 0)>; def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_U_I64 imm:$off, $addr)>; + (LOAD8_U_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_U_I64 imm:$off, $addr)>; + (LOAD16_U_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))), - (LOAD32_U_I64 imm:$off, $addr)>; + (LOAD32_U_I64 imm:$off, $addr, 0)>; def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_U_I32 tglobaladdr:$off, $addr)>; + (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_U_I32 tglobaladdr:$off, $addr)>; + (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_U_I64 tglobaladdr:$off, $addr)>; + (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_U_I64 tglobaladdr:$off, $addr)>; + (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD32_U_I64 tglobaladdr:$off, $addr)>; + (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (extloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_U_I32 texternalsym:$off, $addr)>; + (LOAD8_U_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i32 (extloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_U_I32 texternalsym:$off, $addr)>; + (LOAD16_U_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (extloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_U_I64 texternalsym:$off, $addr)>; + (LOAD8_U_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (extloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_U_I64 texternalsym:$off, $addr)>; + (LOAD16_U_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (extloadi32 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD32_U_I64 texternalsym:$off, $addr)>; + (LOAD32_U_I64 texternalsym:$off, $addr, 0)>; // Select "don't care" extending loads with just a constant offset. -def : Pat<(i32 (extloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i32 (extloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (extloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (extloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (extloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi8 imm:$off)), + (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i32 (extloadi16 imm:$off)), + (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (extloadi8 imm:$off)), + (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (extloadi16 imm:$off)), + (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (extloadi32 imm:$off)), + (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))), - (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; let Defs = [ARGUMENTS] in { @@ -374,193 +403,202 @@ // instruction definition patterns that don't reference all of the output // operands. // Note: WebAssembly inverts SelectionDAG's usual operand order. -def STORE_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], - "i32.store\t$dst, ${off}(${addr}), $val">; -def STORE_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], - "i64.store\t$dst, ${off}(${addr}), $val">; -def STORE_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, F32:$val), [], - "f32.store\t$dst, ${off}(${addr}), $val">; -def STORE_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, F64:$val), [], - "f64.store\t$dst, ${off}(${addr}), $val">; +def STORE_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I32:$val), [], + "i32.store\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I64:$val), [], + "i64.store\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, F32:$val), [], + "f32.store\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, F64:$val), [], + "f64.store\t$dst, ${off}(${addr})${p2align}, $val">; } // Defs = [ARGUMENTS] // Select stores with no constant offset. -def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>; -def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>; -def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>; -def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, 0, I32:$val)>; +def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, 0, I64:$val)>; +def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, 0, F32:$val)>; +def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, 0, F64:$val)>; // Select stores with a constant offset. def : Pat<(store I32:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE_I32 imm:$off, I32:$addr, I32:$val)>; + (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>; def : Pat<(store I64:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE_I64 imm:$off, I32:$addr, I64:$val)>; + (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>; def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE_F32 imm:$off, I32:$addr, F32:$val)>; + (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>; def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE_F64 imm:$off, I32:$addr, F64:$val)>; + (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>; def : Pat<(store I32:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; + (STORE_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>; def : Pat<(store I64:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; + (STORE_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>; def : Pat<(store F32:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>; + (STORE_F32 tglobaladdr:$off, I32:$addr, 0, F32:$val)>; def : Pat<(store F64:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>; + (STORE_F64 tglobaladdr:$off, I32:$addr, 0, F64:$val)>; def : Pat<(store I32:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>; + (STORE_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>; def : Pat<(store I64:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>; + (STORE_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>; def : Pat<(store F32:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>; + (STORE_F32 texternalsym:$off, I32:$addr, 0, F32:$val)>; def : Pat<(store F64:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>; + (STORE_F64 texternalsym:$off, I32:$addr, 0, F64:$val)>; // Select stores with just a constant offset. def : Pat<(store I32:$val, imm:$off), - (STORE_I32 imm:$off, (CONST_I32 0), I32:$val)>; + (STORE_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(store I64:$val, imm:$off), - (STORE_I64 imm:$off, (CONST_I32 0), I64:$val)>; + (STORE_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(store F32:$val, imm:$off), - (STORE_F32 imm:$off, (CONST_I32 0), F32:$val)>; + (STORE_F32 imm:$off, (CONST_I32 0), 0, F32:$val)>; def : Pat<(store F64:$val, imm:$off), - (STORE_F64 imm:$off, (CONST_I32 0), F64:$val)>; + (STORE_F64 imm:$off, (CONST_I32 0), 0, F64:$val)>; def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; + (STORE_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; + (STORE_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE_F32 tglobaladdr:$off, (CONST_I32 0), F32:$val)>; + (STORE_F32 tglobaladdr:$off, (CONST_I32 0), 0, F32:$val)>; def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE_F64 tglobaladdr:$off, (CONST_I32 0), F64:$val)>; + (STORE_F64 tglobaladdr:$off, (CONST_I32 0), 0, F64:$val)>; def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; + (STORE_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + (STORE_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE_F32 texternalsym:$off, (CONST_I32 0), F32:$val)>; + (STORE_F32 texternalsym:$off, (CONST_I32 0), 0, F32:$val)>; def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE_F64 texternalsym:$off, (CONST_I32 0), F64:$val)>; + (STORE_F64 texternalsym:$off, (CONST_I32 0), 0, F64:$val)>; let Defs = [ARGUMENTS] in { // Truncating store. -def STORE8_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], - "i32.store8\t$dst, ${off}(${addr}), $val">; -def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], - "i32.store16\t$dst, ${off}(${addr}), $val">; -def STORE8_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], - "i64.store8\t$dst, ${off}(${addr}), $val">; -def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], - "i64.store16\t$dst, ${off}(${addr}), $val">; -def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], - "i64.store32\t$dst, ${off}(${addr}), $val">; +def STORE8_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I32:$val), [], + "i32.store8\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I32:$val), [], + "i32.store16\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE8_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I64:$val), [], + "i64.store8\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I64:$val), [], + "i64.store16\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I64:$val), [], + "i64.store32\t$dst, ${off}(${addr})${p2align}, $val">; } // Defs = [ARGUMENTS] // Select truncating stores with no constant offset. def : Pat<(truncstorei8 I32:$val, I32:$addr), - (STORE8_I32 0, I32:$addr, I32:$val)>; + (STORE8_I32 0, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, I32:$addr), - (STORE16_I32 0, I32:$addr, I32:$val)>; + (STORE16_I32 0, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, I32:$addr), - (STORE8_I64 0, I32:$addr, I64:$val)>; + (STORE8_I64 0, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, I32:$addr), - (STORE16_I64 0, I32:$addr, I64:$val)>; + (STORE16_I64 0, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, I32:$addr), - (STORE32_I64 0, I32:$addr, I64:$val)>; + (STORE32_I64 0, I32:$addr, 0, I64:$val)>; // Select truncating stores with a constant offset. def : Pat<(truncstorei8 I32:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE8_I32 imm:$off, I32:$addr, I32:$val)>; + (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE16_I32 imm:$off, I32:$addr, I32:$val)>; + (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE8_I64 imm:$off, I32:$addr, I64:$val)>; + (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE16_I64 imm:$off, I32:$addr, I64:$val)>; + (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE32_I64 imm:$off, I32:$addr, I64:$val)>; + (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei8 I32:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; + (STORE8_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; + (STORE16_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; + (STORE8_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; + (STORE16_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; + (STORE32_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei8 I32:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>; + (STORE8_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>; + (STORE16_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>; + (STORE8_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>; + (STORE16_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>; + (STORE32_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>; // Select truncating stores with just a constant offset. def : Pat<(truncstorei8 I32:$val, imm:$off), - (STORE8_I32 imm:$off, (CONST_I32 0), I32:$val)>; + (STORE8_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, imm:$off), - (STORE16_I32 imm:$off, (CONST_I32 0), I32:$val)>; + (STORE16_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, imm:$off), - (STORE8_I64 imm:$off, (CONST_I32 0), I64:$val)>; + (STORE8_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, imm:$off), - (STORE16_I64 imm:$off, (CONST_I32 0), I64:$val)>; + (STORE16_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, imm:$off), - (STORE32_I64 imm:$off, (CONST_I32 0), I64:$val)>; + (STORE32_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; + (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; + (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; + (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; + (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; + (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE8_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; + (STORE8_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE16_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; + (STORE16_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE8_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + (STORE8_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE16_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + (STORE16_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE32_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + (STORE32_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>; let Defs = [ARGUMENTS] in { Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -58,6 +58,11 @@ VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1); VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg)); } + void unstackifyVReg(unsigned VReg) { + if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size()) + return; + VRegStackified.reset(TargetRegisterInfo::virtReg2Index(VReg)); + } bool isVRegStackified(unsigned VReg) const { if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size()) return false; Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -70,8 +70,10 @@ MachineOperand &MO = MI.getOperand(0); unsigned OldReg = MO.getReg(); // TODO: Handle SP/physregs - if (OldReg == MI.getOperand(3).getReg() && - TargetRegisterInfo::isVirtualRegister(MI.getOperand(3).getReg())) { + if (OldReg == + MI.getOperand(WebAssembly::StoreValueOperandNo).getReg() && + TargetRegisterInfo::isVirtualRegister( + MI.getOperand(WebAssembly::StoreValueOperandNo).getReg())) { Changed = true; unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); MO.setReg(NewReg); Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -0,0 +1,108 @@ +//=- WebAssemblySetP2AlignOperands.cpp - Set alignments on loads and stores -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file sets the p2align operands on load and store instructions. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-set-p2align-operands" + +namespace { +class WebAssemblySetP2AlignOperands final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblySetP2AlignOperands() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Set p2align Operands"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblySetP2AlignOperands::ID = 0; +FunctionPass *llvm::createWebAssemblySetP2AlignOperands() { + return new WebAssemblySetP2AlignOperands(); +} + +bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Set p2align Operands **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + bool Changed = false; + + for (auto &MBB : MF) { + for (auto &MI : MBB) { + switch (MI.getOpcode()) { + case WebAssembly::LOAD_I32: + case WebAssembly::LOAD_I64: + case WebAssembly::LOAD_F32: + case WebAssembly::LOAD_F64: + case WebAssembly::LOAD8_S_I32: + case WebAssembly::LOAD8_U_I32: + case WebAssembly::LOAD16_S_I32: + case WebAssembly::LOAD16_U_I32: + case WebAssembly::LOAD8_S_I64: + case WebAssembly::LOAD8_U_I64: + case WebAssembly::LOAD16_S_I64: + case WebAssembly::LOAD16_U_I64: + case WebAssembly::LOAD32_S_I64: + case WebAssembly::LOAD32_U_I64: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I64: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F64: + case WebAssembly::STORE8_I32: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE32_I64: + assert(MI.getOperand(3).getImm() == 0 && + "ISel should set p2align operands to 0"); + assert(MI.hasOneMemOperand() && + "Load and store instructions have exactly one mem operand"); + assert((*MI.memoperands_begin())->getSize() == + (UINT64_C(1) + << WebAssembly::GetDefaultP2Align(MI.getOpcode())) && + "Default p2align value should be natural"); + assert(MI.getDesc().OpInfo[3].OperandType == + WebAssembly::OPERAND_P2ALIGN && + "Load and store instructions should have a p2align operand"); + MI.getOperand(3).setImm( + Log2_64((*MI.memoperands_begin())->getAlignment())); + break; + default: + break; + } + } + } + + return Changed; +} Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -91,7 +91,8 @@ case WebAssembly::STORE_I32: case WebAssembly::STORE_I64: unsigned ToReg = MI.getOperand(0).getReg(); - unsigned FromReg = MI.getOperand(3).getReg(); + unsigned FromReg = + MI.getOperand(WebAssembly::StoreValueOperandNo).getReg(); for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { MachineOperand &O = *I++; MachineInstr *Where = O.getParent(); Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -154,6 +154,10 @@ // so that we can fix up the ARGUMENT instructions before anything else // sees them in the wrong place. addPass(createWebAssemblyArgumentMove()); + // Set the p2align operands. This information is present during ISel, however + // it's inconvenient to collect. Collect it now, and update the immediate + // operands. + addPass(createWebAssemblySetP2AlignOperands()); return false; } Index: llvm/trunk/test/CodeGen/WebAssembly/i32-load-store-alignment.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/i32-load-store-alignment.ll +++ llvm/trunk/test/CodeGen/WebAssembly/i32-load-store-alignment.ll @@ -0,0 +1,210 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test loads and stores with custom alignment values. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; CHECK-LABEL: ldi32_a1: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i32 @ldi32_a1(i32 *%p) { + %v = load i32, i32* %p, align 1 + ret i32 %v +} + +; CHECK-LABEL: ldi32_a2: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0):p2align=1{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i32 @ldi32_a2(i32 *%p) { + %v = load i32, i32* %p, align 2 + ret i32 %v +} + +; 4 is the default alignment for i32 so no attribute is needed. + +; CHECK-LABEL: ldi32_a4: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i32 @ldi32_a4(i32 *%p) { + %v = load i32, i32* %p, align 4 + ret i32 %v +} + +; The default alignment in LLVM is the same as the defualt alignment in wasm. + +; CHECK-LABEL: ldi32: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i32 @ldi32(i32 *%p) { + %v = load i32, i32* %p + ret i32 %v +} + +; CHECK-LABEL: ldi32_a8: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0):p2align=3{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i32 @ldi32_a8(i32 *%p) { + %v = load i32, i32* %p, align 8 + ret i32 %v +} + +; Extending loads. + +; CHECK-LABEL: ldi8_a1: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.load8_u $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i8 @ldi8_a1(i8 *%p) { + %v = load i8, i8* %p, align 1 + ret i8 %v +} + +; CHECK-LABEL: ldi8_a2: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.load8_u $push[[NUM:[0-9]+]]=, 0($0):p2align=1{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i8 @ldi8_a2(i8 *%p) { + %v = load i8, i8* %p, align 2 + ret i8 %v +} + +; CHECK-LABEL: ldi16_a1: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.load16_u $push[[NUM:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i16 @ldi16_a1(i16 *%p) { + %v = load i16, i16* %p, align 1 + ret i16 %v +} + +; CHECK-LABEL: ldi16_a2: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.load16_u $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i16 @ldi16_a2(i16 *%p) { + %v = load i16, i16* %p, align 2 + ret i16 %v +} + +; CHECK-LABEL: ldi16_a4: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i32{{$}} +; CHECK-NEXT: i32.load16_u $push[[NUM:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i16 @ldi16_a4(i16 *%p) { + %v = load i16, i16* %p, align 4 + ret i16 %v +} + +; Stores. + +; CHECK-LABEL: sti32_a1: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store $discard=, 0($0):p2align=0, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti32_a1(i32 *%p, i32 %v) { + store i32 %v, i32* %p, align 1 + ret void +} + +; CHECK-LABEL: sti32_a2: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store $discard=, 0($0):p2align=1, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti32_a2(i32 *%p, i32 %v) { + store i32 %v, i32* %p, align 2 + ret void +} + +; 4 is the default alignment for i32 so no attribute is needed. + +; CHECK-LABEL: sti32_a4: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store $discard=, 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti32_a4(i32 *%p, i32 %v) { + store i32 %v, i32* %p, align 4 + ret void +} + +; The default alignment in LLVM is the same as the defualt alignment in wasm. + +; CHECK-LABEL: sti32: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store $discard=, 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti32(i32 *%p, i32 %v) { + store i32 %v, i32* %p + ret void +} + +; CHECK-LABEL: sti32_a8: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store $discard=, 0($0):p2align=3, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti32_a8(i32 *%p, i32 %v) { + store i32 %v, i32* %p, align 8 + ret void +} + +; Truncating stores. + +; CHECK-LABEL: sti8_a1: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store8 $discard=, 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti8_a1(i8 *%p, i8 %v) { + store i8 %v, i8* %p, align 1 + ret void +} + +; CHECK-LABEL: sti8_a2: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store8 $discard=, 0($0):p2align=1, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti8_a2(i8 *%p, i8 %v) { + store i8 %v, i8* %p, align 2 + ret void +} + +; CHECK-LABEL: sti16_a1: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store16 $discard=, 0($0):p2align=0, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti16_a1(i16 *%p, i16 %v) { + store i16 %v, i16* %p, align 1 + ret void +} + +; CHECK-LABEL: sti16_a2: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store16 $discard=, 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti16_a2(i16 *%p, i16 %v) { + store i16 %v, i16* %p, align 2 + ret void +} + +; CHECK-LABEL: sti16_a4: +; CHECK-NEXT: .param i32, i32{{$}} +; CHECK-NEXT: i32.store16 $discard=, 0($0):p2align=2, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti16_a4(i16 *%p, i16 %v) { + store i16 %v, i16* %p, align 4 + ret void +} Index: llvm/trunk/test/CodeGen/WebAssembly/i64-load-store-alignment.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/i64-load-store-alignment.ll +++ llvm/trunk/test/CodeGen/WebAssembly/i64-load-store-alignment.ll @@ -0,0 +1,323 @@ +; RUN: llc < %s -asm-verbose=false | FileCheck %s + +; Test loads and stores with custom alignment values. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; CHECK-LABEL: ldi64_a1: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi64_a1(i64 *%p) { + %v = load i64, i64* %p, align 1 + ret i64 %v +} + +; CHECK-LABEL: ldi64_a2: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0):p2align=1{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi64_a2(i64 *%p) { + %v = load i64, i64* %p, align 2 + ret i64 %v +} + +; CHECK-LABEL: ldi64_a4: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi64_a4(i64 *%p) { + %v = load i64, i64* %p, align 4 + ret i64 %v +} + +; 8 is the default alignment for i32 so no attribute is needed. + +; CHECK-LABEL: ldi64_a8: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi64_a8(i64 *%p) { + %v = load i64, i64* %p, align 8 + ret i64 %v +} + +; The default alignment in LLVM is the same as the defualt alignment in wasm. + +; CHECK-LABEL: ldi64: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi64(i64 *%p) { + %v = load i64, i64* %p + ret i64 %v +} + +; CHECK-LABEL: ldi64_a16: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0):p2align=4{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi64_a16(i64 *%p) { + %v = load i64, i64* %p, align 16 + ret i64 %v +} + +; Extending loads. + +; CHECK-LABEL: ldi8_a1: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load8_u $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi8_a1(i8 *%p) { + %v = load i8, i8* %p, align 1 + %w = zext i8 %v to i64 + ret i64 %w +} + +; CHECK-LABEL: ldi8_a2: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load8_u $push[[NUM:[0-9]+]]=, 0($0):p2align=1{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi8_a2(i8 *%p) { + %v = load i8, i8* %p, align 2 + %w = zext i8 %v to i64 + ret i64 %w +} + +; CHECK-LABEL: ldi16_a1: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load16_u $push[[NUM:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi16_a1(i16 *%p) { + %v = load i16, i16* %p, align 1 + %w = zext i16 %v to i64 + ret i64 %w +} + +; CHECK-LABEL: ldi16_a2: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load16_u $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi16_a2(i16 *%p) { + %v = load i16, i16* %p, align 2 + %w = zext i16 %v to i64 + ret i64 %w +} + +; CHECK-LABEL: ldi16_a4: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load16_u $push[[NUM:[0-9]+]]=, 0($0):p2align=2{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi16_a4(i16 *%p) { + %v = load i16, i16* %p, align 4 + %w = zext i16 %v to i64 + ret i64 %w +} + +; CHECK-LABEL: ldi32_a1: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load32_u $push[[NUM:[0-9]+]]=, 0($0):p2align=0{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi32_a1(i32 *%p) { + %v = load i32, i32* %p, align 1 + %w = zext i32 %v to i64 + ret i64 %w +} + +; CHECK-LABEL: ldi32_a2: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load32_u $push[[NUM:[0-9]+]]=, 0($0):p2align=1{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi32_a2(i32 *%p) { + %v = load i32, i32* %p, align 2 + %w = zext i32 %v to i64 + ret i64 %w +} + +; CHECK-LABEL: ldi32_a4: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load32_u $push[[NUM:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi32_a4(i32 *%p) { + %v = load i32, i32* %p, align 4 + %w = zext i32 %v to i64 + ret i64 %w +} + +; CHECK-LABEL: ldi32_a8: +; CHECK-NEXT: .param i32{{$}} +; CHECK-NEXT: .result i64{{$}} +; CHECK-NEXT: i64.load32_u $push[[NUM:[0-9]+]]=, 0($0):p2align=3{{$}} +; CHECK-NEXT: return $pop[[NUM]]{{$}} +define i64 @ldi32_a8(i32 *%p) { + %v = load i32, i32* %p, align 8 + %w = zext i32 %v to i64 + ret i64 %w +} + +; Stores. + +; CHECK-LABEL: sti64_a1: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store $discard=, 0($0):p2align=0, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti64_a1(i64 *%p, i64 %v) { + store i64 %v, i64* %p, align 1 + ret void +} + +; CHECK-LABEL: sti64_a2: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store $discard=, 0($0):p2align=1, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti64_a2(i64 *%p, i64 %v) { + store i64 %v, i64* %p, align 2 + ret void +} + +; CHECK-LABEL: sti64_a4: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store $discard=, 0($0):p2align=2, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti64_a4(i64 *%p, i64 %v) { + store i64 %v, i64* %p, align 4 + ret void +} + +; 8 is the default alignment for i32 so no attribute is needed. + +; CHECK-LABEL: sti64_a8: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store $discard=, 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti64_a8(i64 *%p, i64 %v) { + store i64 %v, i64* %p, align 8 + ret void +} + +; The default alignment in LLVM is the same as the defualt alignment in wasm. + +; CHECK-LABEL: sti64: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store $discard=, 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti64(i64 *%p, i64 %v) { + store i64 %v, i64* %p + ret void +} + +; CHECK-LABEL: sti64_a16: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store $discard=, 0($0):p2align=4, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti64_a16(i64 *%p, i64 %v) { + store i64 %v, i64* %p, align 16 + ret void +} + +; Truncating stores. + +; CHECK-LABEL: sti8_a1: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store8 $discard=, 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti8_a1(i8 *%p, i64 %w) { + %v = trunc i64 %w to i8 + store i8 %v, i8* %p, align 1 + ret void +} + +; CHECK-LABEL: sti8_a2: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store8 $discard=, 0($0):p2align=1, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti8_a2(i8 *%p, i64 %w) { + %v = trunc i64 %w to i8 + store i8 %v, i8* %p, align 2 + ret void +} + +; CHECK-LABEL: sti16_a1: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store16 $discard=, 0($0):p2align=0, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti16_a1(i16 *%p, i64 %w) { + %v = trunc i64 %w to i16 + store i16 %v, i16* %p, align 1 + ret void +} + +; CHECK-LABEL: sti16_a2: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store16 $discard=, 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti16_a2(i16 *%p, i64 %w) { + %v = trunc i64 %w to i16 + store i16 %v, i16* %p, align 2 + ret void +} + +; CHECK-LABEL: sti16_a4: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store16 $discard=, 0($0):p2align=2, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti16_a4(i16 *%p, i64 %w) { + %v = trunc i64 %w to i16 + store i16 %v, i16* %p, align 4 + ret void +} + +; CHECK-LABEL: sti32_a1: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store32 $discard=, 0($0):p2align=0, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti32_a1(i32 *%p, i64 %w) { + %v = trunc i64 %w to i32 + store i32 %v, i32* %p, align 1 + ret void +} + +; CHECK-LABEL: sti32_a2: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store32 $discard=, 0($0):p2align=1, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti32_a2(i32 *%p, i64 %w) { + %v = trunc i64 %w to i32 + store i32 %v, i32* %p, align 2 + ret void +} + +; CHECK-LABEL: sti32_a4: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store32 $discard=, 0($0), $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti32_a4(i32 *%p, i64 %w) { + %v = trunc i64 %w to i32 + store i32 %v, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: sti32_a8: +; CHECK-NEXT: .param i32, i64{{$}} +; CHECK-NEXT: i64.store32 $discard=, 0($0):p2align=3, $1{{$}} +; CHECK-NEXT: return{{$}} +define void @sti32_a8(i32 *%p, i64 %w) { + %v = trunc i64 %w to i32 + store i32 %v, i32* %p, align 8 + ret void +} Index: llvm/trunk/test/CodeGen/WebAssembly/offset.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/offset.ll +++ llvm/trunk/test/CodeGen/WebAssembly/offset.ll @@ -372,14 +372,27 @@ ret void } -; Fold the offsets when lowering aggregate return values. +; Fold the offsets when lowering aggregate return values. The stores get +; merged into i64 stores. ; CHECK-LABEL: aggregate_return: -; CHECK: i32.const $push0=, 0{{$}} -; CHECK: i32.store $push1=, 12($0), $pop0{{$}} -; CHECK: i32.store $push2=, 8($0), $pop1{{$}} -; CHECK: i32.store $push3=, 4($0), $pop2{{$}} -; CHECK: i32.store $discard=, 0($0), $pop3{{$}} +; CHECK: i64.const $push0=, 0{{$}} +; CHECK: i64.store $push1=, 8($0):p2align=2, $pop0{{$}} +; CHECK: i64.store $discard=, 0($0):p2align=2, $pop1{{$}} define {i32,i32,i32,i32} @aggregate_return() { ret {i32,i32,i32,i32} zeroinitializer } + +; Fold the offsets when lowering aggregate return values. The stores are not +; merged. + +; CHECK-LABEL: aggregate_return_without_merge: +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.store8 $push1=, 14($0), $pop0{{$}} +; CHECK: i32.store16 $push2=, 12($0), $pop1{{$}} +; CHECK: i32.store $discard=, 8($0), $pop2{{$}} +; CHECK: i64.const $push3=, 0{{$}} +; CHECK: i64.store $discard=, 0($0), $pop3{{$}} +define {i64,i32,i16,i8} @aggregate_return_without_merge() { + ret {i64,i32,i16,i8} zeroinitializer +} Index: llvm/trunk/test/CodeGen/WebAssembly/userstack.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/userstack.ll +++ llvm/trunk/test/CodeGen/WebAssembly/userstack.ll @@ -57,7 +57,7 @@ ; CHECK-NEXT: i32.store [[SP]]=, 0([[L2]]), [[SP]] %r = alloca [5 x i32] - ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 4 + ; CHECK-NEXT: i32.const $push[[L4:.+]]=, 12 ; CHECK-NEXT: i32.const [[L5:.+]]=, 12 ; CHECK-NEXT: i32.add [[L5]]=, [[SP]], [[L5]] ; CHECK-NEXT: i32.add $push[[L6:.+]]=, [[L5]], $pop[[L4]] @@ -66,7 +66,7 @@ ; CHECK-NEXT: i32.store $discard=, 0($pop3), $pop[[L10]]{{$}} %p = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 0 store i32 1, i32* %p - %p2 = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 1 + %p2 = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 ; CHECK-NEXT: i32.const [[L7:.+]]=, 32 @@ -89,8 +89,8 @@ %p = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 0 store i32 1, i32* %p ; This store should have both the GEP and the FI folded into it. - ; CHECK-NEXT: i32.store {{.*}}=, 16([[SP]]), $pop - %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 1 + ; CHECK-NEXT: i32.store {{.*}}=, 24([[SP]]), $pop + %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 3 store i32 1, i32* %p2 ; CHECK: i32.const [[L7:.+]]=, 32 ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L7]]