Index: lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp =================================================================== --- lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp +++ lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -210,6 +210,11 @@ return "f32"; case MVT::f64: return "f64"; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v4f32: + return "v128"; default: llvm_unreachable("unsupported type"); } Index: lib/Target/WebAssembly/WebAssembly.td =================================================================== --- lib/Target/WebAssembly/WebAssembly.td +++ lib/Target/WebAssembly/WebAssembly.td @@ -23,7 +23,7 @@ // WebAssembly Subtarget features. //===----------------------------------------------------------------------===// -def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "false", +def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "true", "Enable 128-bit SIMD">; //===----------------------------------------------------------------------===// Index: lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp +++ lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp @@ -26,9 +26,10 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -64,19 +65,6 @@ return new WebAssemblyArgumentMove(); } -/// Test whether the given instruction is an ARGUMENT. -static bool IsArgument(const MachineInstr &MI) { - switch (MI.getOpcode()) { - case WebAssembly::ARGUMENT_I32: - case WebAssembly::ARGUMENT_I64: - case WebAssembly::ARGUMENT_F32: - case WebAssembly::ARGUMENT_F64: - return true; - default: - return false; - } -} - bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) { DEBUG({ dbgs() << "********** Argument Move **********\n" @@ -87,9 +75,10 @@ MachineBasicBlock &EntryMBB = MF.front(); MachineBasicBlock::iterator InsertPt = EntryMBB.end(); + const auto &Subtarget = MF.getSubtarget(); // Look for the first NonArg instruction. for (MachineInstr &MI : EntryMBB) { - if (!IsArgument(MI)) { + if (!Subtarget.isArgument(MI)) { InsertPt = MI; break; } @@ -98,7 +87,7 @@ // Now move any argument instructions later in the block // to before our first NonArg instruction. for (MachineInstr &MI : llvm::make_range(InsertPt, EntryMBB.end())) { - if (IsArgument(MI)) { + if (Subtarget.isArgument(MI)) { EntryMBB.insert(InsertPt, MI.removeFromParent()); Changed = true; } Index: lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -43,6 +43,7 @@ class WebAssemblyAsmPrinter final : public AsmPrinter { const MachineRegisterInfo *MRI; const WebAssemblyFunctionInfo *MFI; + const WebAssemblySubtarget *Subtarget; public: WebAssemblyAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) @@ -60,6 +61,7 @@ bool runOnMachineFunction(MachineFunction &MF) override { MRI = &MF.getRegInfo(); MFI = MF.getInfo(); + Subtarget = &MF.getSubtarget(); return AsmPrinter::runOnMachineFunction(MF); } @@ -98,6 +100,11 @@ for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) if (TRC->hasType(T)) return T; + if (Subtarget->hasSIMD128()) { + for (MVT T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + if (TRC->hasType(T)) + return T; + } DEBUG(errs() << "Unknown type for register number: " << RegNo); llvm_unreachable("Unknown register type"); return MVT::Other; @@ -221,6 +228,12 @@ DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n'); switch (MI->getOpcode()) { + case WebAssembly::ARGUMENT_v16i8: + case WebAssembly::ARGUMENT_v8i16: + case WebAssembly::ARGUMENT_v4i32: + case WebAssembly::ARGUMENT_v4f32: + assert(Subtarget->hasSIMD128() && "SIMD argument without SIMD128 support."); + // fallthrough intended. case WebAssembly::ARGUMENT_I32: case WebAssembly::ARGUMENT_I64: case WebAssembly::ARGUMENT_F32: @@ -228,6 +241,12 @@ // These represent values which are live into the function entry, so there's // no instruction to emit. break; + case WebAssembly::FALLTHROUGH_RETURN_v16i8: + case WebAssembly::FALLTHROUGH_RETURN_v8i16: + case WebAssembly::FALLTHROUGH_RETURN_v4i32: + case WebAssembly::FALLTHROUGH_RETURN_v4f32: + assert(Subtarget->hasSIMD128() && "SIMD argument without SIMD128 support."); + // fallthrough intended. case WebAssembly::FALLTHROUGH_RETURN_I32: case WebAssembly::FALLTHROUGH_RETURN_I64: case WebAssembly::FALLTHROUGH_RETURN_F32: Index: lib/Target/WebAssembly/WebAssemblyFastISel.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -113,6 +113,13 @@ case MVT::f32: case MVT::f64: return VT; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v4f32: + if (Subtarget->hasSIMD128()) + return VT; + break; default: break; } @@ -575,7 +582,9 @@ return false; Type *ArgTy = Arg.getType(); - if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + if (ArgTy->isStructTy() || ArgTy->isArrayTy()) + return false; + if (!Subtarget->hasSIMD128() && ArgTy->isVectorTy()) return false; unsigned Opc; @@ -600,6 +609,22 @@ Opc = WebAssembly::ARGUMENT_F64; RC = &WebAssembly::F64RegClass; break; + case MVT::v16i8: + Opc = WebAssembly::ARGUMENT_v16i8; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v8i16: + Opc = WebAssembly::ARGUMENT_v8i16; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v4i32: + Opc = WebAssembly::ARGUMENT_v4i32; + RC = &WebAssembly::V128RegClass; + break; + case MVT::v4f32: + Opc = WebAssembly::ARGUMENT_v4f32; + RC = &WebAssembly::V128RegClass; + break; default: return false; } @@ -639,6 +664,9 @@ if (IsVoid) { Opc = IsDirect ? WebAssembly::CALL_VOID : WebAssembly::CALL_INDIRECT_VOID; } else { + if (!Subtarget->hasSIMD128() && Call->getType()->isVectorTy()) + return false; + MVT::SimpleValueType RetTy = getSimpleType(Call->getType()); switch (RetTy) { case MVT::i1: @@ -660,6 +688,26 @@ Opc = IsDirect ? WebAssembly::CALL_F64 : WebAssembly::CALL_INDIRECT_F64; ResultReg = createResultReg(&WebAssembly::F64RegClass); break; + case MVT::v16i8: + Opc = + IsDirect ? WebAssembly::CALL_v16i8 : WebAssembly::CALL_INDIRECT_v16i8; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v8i16: + Opc = + IsDirect ? WebAssembly::CALL_v8i16 : WebAssembly::CALL_INDIRECT_v8i16; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v4i32: + Opc = + IsDirect ? WebAssembly::CALL_v4i32 : WebAssembly::CALL_INDIRECT_v4i32; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; + case MVT::v4f32: + Opc = + IsDirect ? WebAssembly::CALL_v4f32 : WebAssembly::CALL_INDIRECT_v4f32; + ResultReg = createResultReg(&WebAssembly::V128RegClass); + break; default: return false; } @@ -972,6 +1020,8 @@ const LoadInst *Load = cast(I); if (Load->isAtomic()) return false; + if (!Subtarget->hasSIMD128() && Load->getType()->isVectorTy()) + return false; Address Addr; if (!computeAddress(Load->getPointerOperand(), Addr)) @@ -1027,6 +1077,9 @@ const StoreInst *Store = cast(I); if (Store->isAtomic()) return false; + if (!Subtarget->hasSIMD128() && + Store->getValueOperand()->getType()->isVectorTy()) + return false; Address Addr; if (!computeAddress(Store->getPointerOperand(), Addr)) @@ -1102,7 +1155,7 @@ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addMBB(TBB) .addReg(CondReg); - + finishCondBranch(Br->getParent(), TBB, FBB); return true; } @@ -1120,6 +1173,9 @@ } Value *RV = Ret->getOperand(0); + if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy()) + return false; + unsigned Opc; switch (getSimpleType(RV->getType())) { case MVT::i1: case MVT::i8: @@ -1129,8 +1185,24 @@ case MVT::i64: Opc = WebAssembly::RETURN_I64; break; - case MVT::f32: Opc = WebAssembly::RETURN_F32; break; - case MVT::f64: Opc = WebAssembly::RETURN_F64; break; + case MVT::f32: + Opc = WebAssembly::RETURN_F32; + break; + case MVT::f64: + Opc = WebAssembly::RETURN_F64; + break; + case MVT::v16i8: + Opc = WebAssembly::RETURN_v16i8; + break; + case MVT::v8i16: + Opc = WebAssembly::RETURN_v8i16; + break; + case MVT::v4i32: + Opc = WebAssembly::RETURN_v4i32; + break; + case MVT::v4f32: + Opc = WebAssembly::RETURN_v4f32; + break; default: return false; } Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -54,6 +54,12 @@ addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); + if (Subtarget->hasSIMD128()) { + addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); + addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); + } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -190,6 +196,10 @@ switch (Constraint[0]) { case 'r': assert(VT != MVT::iPTR && "Pointer MVT not expected here"); + if (Subtarget->hasSIMD128() && VT.isVector()) { + if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &WebAssembly::V128RegClass); + } if (VT.isInteger() && !VT.isVector()) { if (VT.getSizeInBits() <= 32) return std::make_pair(0U, &WebAssembly::I32RegClass); Index: lib/Target/WebAssembly/WebAssemblyInstrCall.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -33,11 +33,27 @@ [(set vt:$dst, (WebAssemblycall1 I32:$callee))], !strconcat(prefix, "call_indirect\t$dst, $callee")>; } + +multiclass SIMD_CALL { + let Predicates = [HasSIMD128] in { + def CALL_#vt : I<(outs V128:$dst), (ins i32imm:$callee, variable_ops), + [(set (vt V128:$dst), (WebAssemblycall1 (i32 imm:$callee)))], + !strconcat(prefix, "call\t$dst, $callee")>; + def CALL_INDIRECT_#vt : I<(outs V128:$dst), (ins I32:$callee, variable_ops), + [(set (vt V128:$dst), (WebAssemblycall1 I32:$callee))], + !strconcat(prefix, "call_indirect\t$dst, $callee")>; + } // Predicates = [HasSIMD128] +} + let Uses = [SP32, SP64], isCall = 1 in { defm : CALL; defm : CALL; defm : CALL; defm : CALL; + defm : SIMD_CALL; + defm : SIMD_CALL; + defm : SIMD_CALL; + defm : SIMD_CALL; def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops), [(WebAssemblycall0 (i32 imm:$callee))], @@ -58,6 +74,14 @@ (CALL_F32 tglobaladdr:$callee)>; def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), (CALL_F64 tglobaladdr:$callee)>; +def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v16i8 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v8i16 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v4i32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)), (CALL_VOID tglobaladdr:$callee)>; @@ -70,5 +94,13 @@ (CALL_F32 texternalsym:$callee)>; def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), (CALL_F64 texternalsym:$callee)>; +def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v16i8 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v8i16 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v4i32 texternalsym:$callee)>, Requires<[HasSIMD128]>; +def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>; def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)), (CALL_VOID texternalsym:$callee)>; Index: lib/Target/WebAssembly/WebAssemblyInstrControl.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -77,12 +77,28 @@ def FALLTHROUGH_RETURN_#vt : I<(outs), (ins vt:$val), []>; } +multiclass SIMD_RETURN { + let Predicates = [HasSIMD128] in { + def RETURN_#vt : I<(outs), (ins V128:$val), [(WebAssemblyreturn (vt V128:$val))], + "return \t$val">; + // Equivalent to RETURN_#vt, for use at the end of a function when wasm + // semantics return by falling off the end of the block. + let isCodeGenOnly = 1 in + def FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), []>; + } // Predicates = [HasSIMD128] +} + let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { let isReturn = 1 in { defm : RETURN; defm : RETURN; defm : RETURN; defm : RETURN; + defm : SIMD_RETURN; + defm : SIMD_RETURN; + defm : SIMD_RETURN; + defm : SIMD_RETURN; + def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return">; // This is to RETURN_VOID what FALLTHROUGH_RETURN_#vt is to RETURN_#vt. Index: lib/Target/WebAssembly/WebAssemblyInstrFormats.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrFormats.td +++ lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -61,6 +61,21 @@ [(set F64:$dst, (node F64:$lhs, F64:$rhs))], !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; } +multiclass SIMDBinary { + def _I8x16 : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + [(set (v16i8 V128:$dst), (node (v16i8 V128:$lhs), (v16i8 V128:$rhs)))], + !strconcat("i8x16.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _I16x8 : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + [(set (v8i16 V128:$dst), (node V128:$lhs, V128:$rhs))], + !strconcat("i16x8.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _I32x4 : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + [(set (v4i32 V128:$dst), (node V128:$lhs, V128:$rhs))], + !strconcat("i32x4.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + def _F32x4 : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + [(set (v4f32 V128:$dst), (fnode V128:$lhs, V128:$rhs))], + !strconcat("f32x4.", !strconcat(name, "\t$dst, $lhs, $rhs"))>; + +} multiclass ComparisonInt { def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs), [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))], Index: lib/Target/WebAssembly/WebAssemblyInstrInfo.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -100,10 +100,19 @@ def ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno), [(set vt:$res, (WebAssemblyargument timm:$argno))]>; } +multiclass SIMD_ARGUMENT { + let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1, Predicates = [HasSIMD128] in + def ARGUMENT_#vt : I<(outs V128:$res), (ins i32imm:$argno), + [(set (vt V128:$res), (WebAssemblyargument timm:$argno))]>; +} defm : ARGUMENT; defm : ARGUMENT; defm : ARGUMENT; defm : ARGUMENT; +defm : SIMD_ARGUMENT; +defm : SIMD_ARGUMENT; +defm : SIMD_ARGUMENT; +defm : SIMD_ARGUMENT; let Defs = [ARGUMENTS] in { @@ -131,6 +140,7 @@ defm : LOCAL; defm : LOCAL; defm : LOCAL; +defm : LOCAL, Requires<[HasSIMD128]>; let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm), Index: lib/Target/WebAssembly/WebAssemblyInstrSIMD.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -12,5 +12,11 @@ /// //===----------------------------------------------------------------------===// -// TODO: Implement SIMD instructions. -// Note: use Requires<[HasSIMD128]>. +let Predicates = [HasSIMD128] in { +let isCommutable = 1 in { +defm ADD : SIMDBinary, Requires<[HasSIMD128]>; +defm MUL: SIMDBinary, Requires<[HasSIMD128]>; +} // isCommutable = 1 + +defm SUB: SIMDBinary, Requires<[HasSIMD128]>; +} // Predicates = [HasSIMD128] Index: lib/Target/WebAssembly/WebAssemblyPeephole.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -108,7 +108,8 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo(); - const auto &TII = *MF.getSubtarget().getInstrInfo(); + const auto &Subtarget = MF.getSubtarget(); + const auto &TII = *Subtarget.getInstrInfo(); const WebAssemblyTargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); auto &LibInfo = getAnalysis().getTLI(); @@ -186,6 +187,34 @@ MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64, WebAssembly::COPY_LOCAL_F64); break; + case WebAssembly::RETURN_v16i8: + Changed |= + Subtarget.hasSIMD128() && + MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII, + WebAssembly::FALLTHROUGH_RETURN_v16i8, + WebAssembly::COPY_LOCAL_V128); + break; + case WebAssembly::RETURN_v8i16: + Changed |= + Subtarget.hasSIMD128() && + MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII, + WebAssembly::FALLTHROUGH_RETURN_v8i16, + WebAssembly::COPY_LOCAL_V128); + break; + case WebAssembly::RETURN_v4i32: + Changed |= + Subtarget.hasSIMD128() && + MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII, + WebAssembly::FALLTHROUGH_RETURN_v4i32, + WebAssembly::COPY_LOCAL_V128); + break; + case WebAssembly::RETURN_v4f32: + Changed |= + Subtarget.hasSIMD128() && + MaybeRewriteToFallthrough(MI, MBB, MF, MFI, MRI, TII, + WebAssembly::FALLTHROUGH_RETURN_v4f32, + WebAssembly::COPY_LOCAL_V128); + break; case WebAssembly::RETURN_VOID: if (!DisableWebAssemblyFallthroughReturnOpt && &MBB == &MF.back() && &MI == &MBB.back()) Index: lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -58,23 +58,11 @@ return new WebAssemblyPrepareForLiveIntervals(); } -/// Test whether the given instruction is an ARGUMENT. -static bool IsArgument(const MachineInstr *MI) { - switch (MI->getOpcode()) { - case WebAssembly::ARGUMENT_I32: - case WebAssembly::ARGUMENT_I64: - case WebAssembly::ARGUMENT_F32: - case WebAssembly::ARGUMENT_F64: - return true; - default: - return false; - } -} - // Test whether the given register has an ARGUMENT def. -static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { - for (auto &Def : MRI.def_instructions(Reg)) - if (IsArgument(&Def)) +static bool HasArgumentDef(const WebAssemblySubtarget &Subtarget, unsigned Reg, + const MachineRegisterInfo &MRI) { + for (const auto &Def : MRI.def_instructions(Reg)) + if (Subtarget.isArgument(Def)) return true; return false; } @@ -87,7 +75,8 @@ bool Changed = false; MachineRegisterInfo &MRI = MF.getRegInfo(); - const auto &TII = *MF.getSubtarget().getInstrInfo(); + const auto &Subtarget = MF.getSubtarget(); + const auto &TII = *Subtarget.getInstrInfo(); MachineBasicBlock &Entry = *MF.begin(); assert(!mustPreserveAnalysisID(LiveIntervalsID) && @@ -111,7 +100,7 @@ continue; // Skip registers that have an ARGUMENT definition. - if (HasArgumentDef(Reg, MRI)) + if (HasArgumentDef(Subtarget, Reg, MRI)) continue; BuildMI(Entry, Entry.begin(), DebugLoc(), @@ -123,7 +112,7 @@ // liveness reflects the fact that these really are live-in values. for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE; ) { MachineInstr *MI = &*MII++; - if (IsArgument(MI)) { + if (Subtarget.isArgument(*MI)) { MI->removeFromParent(); Entry.insert(Entry.begin(), MI); } Index: lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -61,6 +61,7 @@ WebAssemblyFunctionInfo &MFI = *MF.getInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &Subtarget = MF.getSubtarget(); MFI.initWARegs(); @@ -69,6 +70,13 @@ MachineBasicBlock &EntryMBB = MF.front(); for (MachineInstr &MI : EntryMBB) { switch (MI.getOpcode()) { + case WebAssembly::ARGUMENT_v16i8: + case WebAssembly::ARGUMENT_v8i16: + case WebAssembly::ARGUMENT_v4i32: + case WebAssembly::ARGUMENT_v4f32: + if (!Subtarget.hasSIMD128()) + break; + // fallthrough intended. case WebAssembly::ARGUMENT_I32: case WebAssembly::ARGUMENT_I64: case WebAssembly::ARGUMENT_F32: Index: lib/Target/WebAssembly/WebAssemblyRegStackify.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -409,7 +409,8 @@ } /// Get the appropriate tee_local opcode for the given register class. -static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) { +static unsigned GetTeeLocalOpcode(const WebAssemblySubtarget &Subtarget, + const TargetRegisterClass *RC) { if (RC == &WebAssembly::I32RegClass) return WebAssembly::TEE_LOCAL_I32; if (RC == &WebAssembly::I64RegClass) @@ -418,6 +419,8 @@ return WebAssembly::TEE_LOCAL_F32; if (RC == &WebAssembly::F64RegClass) return WebAssembly::TEE_LOCAL_F64; + if (Subtarget.hasSIMD128() && RC == &WebAssembly::V128RegClass) + return WebAssembly::TEE_LOCAL_V128; llvm_unreachable("Unexpected register class"); } @@ -534,9 +537,10 @@ /// with DefReg and TeeReg stackified. This eliminates a get_local from the /// resulting code. static MachineInstr *MoveAndTeeForMultiUse( - unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB, - MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI, - MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) { + unsigned Reg, const WebAssemblySubtarget &Subtarget, MachineOperand &Op, + MachineInstr *Def, MachineBasicBlock &MBB, MachineInstr *Insert, + LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI, + const WebAssemblyInstrInfo *TII) { DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump()); // Move Def into place. @@ -548,10 +552,11 @@ unsigned TeeReg = MRI.createVirtualRegister(RegClass); unsigned DefReg = MRI.createVirtualRegister(RegClass); MachineOperand &DefMO = Def->getOperand(0); - MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(), - TII->get(GetTeeLocalOpcode(RegClass)), TeeReg) - .addReg(Reg, RegState::Define) - .addReg(DefReg, getUndefRegState(DefMO.isDead())); + MachineInstr *Tee = + BuildMI(MBB, Insert, Insert->getDebugLoc(), + TII->get(GetTeeLocalOpcode(Subtarget, RegClass)), TeeReg) + .addReg(Reg, RegState::Define) + .addReg(DefReg, getUndefRegState(DefMO.isDead())); Op.setReg(TeeReg); DefMO.setReg(DefReg); SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot(); @@ -708,6 +713,7 @@ bool Changed = false; MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo(); + const auto &Subtarget = MF.getSubtarget(); const auto *TII = MF.getSubtarget().getInstrInfo(); const auto *TRI = MF.getSubtarget().getRegisterInfo(); AliasAnalysis &AA = getAnalysis().getAAResults(); @@ -765,7 +771,11 @@ if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 || Def->getOpcode() == WebAssembly::ARGUMENT_I64 || Def->getOpcode() == WebAssembly::ARGUMENT_F32 || - Def->getOpcode() == WebAssembly::ARGUMENT_F64) + Def->getOpcode() == WebAssembly::ARGUMENT_F64 || + Def->getOpcode() == WebAssembly::ARGUMENT_v16i8 || + Def->getOpcode() == WebAssembly::ARGUMENT_v8i16 || + Def->getOpcode() == WebAssembly::ARGUMENT_v4i32 || + Def->getOpcode() == WebAssembly::ARGUMENT_v4f32) continue; // Decide which strategy to take. Prefer to move a single-use value @@ -785,8 +795,8 @@ LIS, MFI, MRI, TII, TRI); } else if (CanMove && OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) { - Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI, - MRI, TII); + Insert = MoveAndTeeForMultiUse(Reg, Subtarget, Op, Def, MBB, Insert, + LIS, MFI, MRI, TII); } else { // We failed to stackify the operand. If the problem was ordering // constraints, Commuting may be able to help. Index: lib/Target/WebAssembly/WebAssemblyRegisterInfo.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyRegisterInfo.td +++ lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -39,6 +39,8 @@ def F32_0 : WebAssemblyReg<"%f32.0">; def F64_0 : WebAssemblyReg<"%f64.0">; +def V128_0: WebAssemblyReg<"%v128">; + // The expression stack "register". This is an opaque entity which serves to // order uses and defs that must remain in LIFO order. def EXPR_STACK : WebAssemblyReg<"STACK">; @@ -56,3 +58,5 @@ def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64)>; def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>; def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>; +def V128 : WebAssemblyRegClass<[v4f32, v4i32, v16i8, v8i16], 128, (add V128_0)>; + Index: lib/Target/WebAssembly/WebAssemblySubtarget.h =================================================================== --- lib/Target/WebAssembly/WebAssemblySubtarget.h +++ lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -78,6 +78,9 @@ /// Parses features string setting specified subtarget options. Definition of /// function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + /// Test whether the given instruction is an ARGUMENT. + bool isArgument(const MachineInstr &MI) const; }; } // end namespace llvm Index: lib/Target/WebAssembly/WebAssemblySubtarget.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -53,3 +53,24 @@ } bool WebAssemblySubtarget::useAA() const { return true; } + +bool WebAssemblySubtarget::isArgument(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case WebAssembly::ARGUMENT_I32: + case WebAssembly::ARGUMENT_I64: + case WebAssembly::ARGUMENT_F32: + case WebAssembly::ARGUMENT_F64: + return true; + case WebAssembly::ARGUMENT_v16i8: + case WebAssembly::ARGUMENT_v8i16: + case WebAssembly::ARGUMENT_v4i32: + case WebAssembly::ARGUMENT_v4f32: + if (HasSIMD128) + return true; + return false; + default: + return false; + } + + llvm_unreachable(""); +} Index: test/CodeGen/WebAssembly/simd-arith.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/simd-arith.ll @@ -0,0 +1,158 @@ +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128 +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=-simd128 | FileCheck %s --check-prefixes CHECK,NO-SIMD128 +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -mattr=-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128 + +; Test that basic SIMD128 arithmetic operations assemble as expected. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +declare i32 @llvm.ctlz.i32(i32, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i32 @llvm.ctpop.i32(i32) + +; ============================================================================== +; 16 x i8 +; ============================================================================== +; CHECK-LABEL: add_v16i8 +; NO-SIMD128-NOT: i8x16 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i8x16.add $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) { + %a = add <16 x i8> %x, %y + ret <16 x i8> %a +} + +; CHECK-LABEL: sub_v16i8 +; NO-SIMD128-NOT: i8x16 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i8x16.sub $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) { + %a = sub <16 x i8> %x, %y + ret <16 x i8> %a +} + +; CHECK-LABEL: mul_v16i8 +; NO-SIMD128-NOT: i8x16 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i8x16.mul $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { + %a = mul <16 x i8> %x, %y + ret <16 x i8> %a +} + +; ============================================================================== +; 8 x i16 +; ============================================================================== +; CHECK-LABEL: add_v8i16 +; NO-SIMD128-NOT: i16x8 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i16x8.add $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) { + %a = add <8 x i16> %x, %y + ret <8 x i16> %a +} + +; CHECK-LABEL: sub_v8i16 +; NO-SIMD128-NOT: i16x8 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i16x8.sub $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) { + %a = sub <8 x i16> %x, %y + ret <8 x i16> %a +} + +; CHECK-LABEL: mul_v8i16 +; NO-SIMD128-NOT: i16x8 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i16x8.mul $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) { + %a = mul <8 x i16> %x, %y + ret <8 x i16> %a +} + +; ============================================================================== +; 4 x i32 +; ============================================================================== +; CHECK-LABEL: add_v4i32 +; NO-SIMD128-NOT: i32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i32x4.add $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) { + %a = add <4 x i32> %x, %y + ret <4 x i32> %a +} + +; CHECK-LABEL: sub_v4i32 +; NO-SIMD128-NOT: i32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i32x4.sub $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) { + %a = sub <4 x i32> %x, %y + ret <4 x i32> %a +} + +; CHECK-LABEL: mul_v4i32 +; NO-SIMD128-NOT: i32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: i32x4.mul $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) { + %a = mul <4 x i32> %x, %y + ret <4 x i32> %a +} + +; ============================================================================== +; 4 x float +; ============================================================================== +; CHECK-LABEL: add_v4f32 +; NO-SIMD128-NOT: f32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: f32x4.add $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) { + %a = fadd <4 x float> %x, %y + ret <4 x float> %a +} + +; CHECK-LABEL: sub_v4f32 +; NO-SIMD128-NOT: f32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: f32x4.sub $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) { + %a = fsub <4 x float> %x, %y + ret <4 x float> %a +} + +; CHECK-LABEL: mul_v4f32 +; NO-SIMD128-NOT: f32x4 +; SIMD128: .param v128, v128{{$}} +; SIMD128: .result v128{{$}} +; SIMD128: f32x4.mul $push0=, $0, $1{{$}} +; SIMD128: return $pop0{{$}} +define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) { + %a = fmul <4 x float> %x, %y + ret <4 x float> %a +} +