diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -51,6 +51,7 @@ const WebAssemblySubtarget *Subtarget; AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; + bool shouldScalarizeBinop(SDValue VecOp) const override; FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override; MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -335,6 +335,26 @@ return AtomicExpansionKind::CmpXChg; } +bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const { + // Implementation copied from X86TargetLowering. + unsigned Opc = VecOp.getOpcode(); + + // Assume target opcodes can't be scalarized. + // TODO - do we have any exceptions? + if (Opc >= ISD::BUILTIN_OP_END) + return false; + + // If the vector op is not supported, try to convert to scalar. + EVT VecVT = VecOp.getValueType(); + if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) + return true; + + // If the vector op is supported, but the scalar op is not, the transform may + // not be worthwhile. + EVT ScalarVT = VecVT.getScalarType(); + return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); +} + FastISel *WebAssemblyTargetLowering::createFastISel( FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { return WebAssembly::createFastISel(FuncInfo, LibInfo); diff --git a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll --- a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll +++ b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll @@ -3,8 +3,6 @@ ;; Check that masked shift counts are optimized out. -;; TODO: optimize the *_late functions. - target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" @@ -336,10 +334,6 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.const 31, 31, 31, 31 -; CHECK-NEXT: v128.and -; CHECK-NEXT: i32x4.extract_lane 0 ; CHECK-NEXT: i32x4.shl ; CHECK-NEXT: # fallthrough-return %t = insertelement <4 x i32> undef, i32 %x, i32 0 @@ -372,10 +366,6 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.const 31, 31, 31, 31 -; CHECK-NEXT: v128.and -; CHECK-NEXT: i32x4.extract_lane 0 ; CHECK-NEXT: i32x4.shr_s ; CHECK-NEXT: # fallthrough-return %t = insertelement <4 x i32> undef, i32 %x, i32 0 @@ -408,10 +398,6 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.const 31, 31, 31, 31 -; CHECK-NEXT: v128.and -; CHECK-NEXT: i32x4.extract_lane 0 ; CHECK-NEXT: i32x4.shr_u ; CHECK-NEXT: # fallthrough-return %t = insertelement <4 x i32> undef, i32 %x, i32 0 @@ -444,10 +430,6 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.const 63, 63 -; CHECK-NEXT: v128.and -; CHECK-NEXT: i64x2.extract_lane 0 ; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i64x2.shl ; CHECK-NEXT: # fallthrough-return @@ -480,10 +462,6 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.const 63, 63 -; CHECK-NEXT: v128.and -; CHECK-NEXT: i64x2.extract_lane 0 ; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i64x2.shr_s ; CHECK-NEXT: # fallthrough-return @@ -516,10 +494,6 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.const 63, 63 -; CHECK-NEXT: v128.and -; CHECK-NEXT: i64x2.extract_lane 0 ; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i64x2.shr_u ; CHECK-NEXT: # fallthrough-return