diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -108,6 +108,11 @@ MachineFunction &MF, unsigned Intrinsic) const override; + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const override; + SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" @@ -823,6 +824,30 @@ } } +void WebAssemblyTargetLowering::computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth) const { + switch (Op.getOpcode()) { + default: + break; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + break; + case Intrinsic::wasm_bitmask: { + unsigned BitWidth = Known.getBitWidth(); + EVT VT = Op.getOperand(1).getSimpleValueType(); + unsigned PossibleBits = VT.getVectorNumElements(); + APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits); + Known.Zero |= ZeroMask; + break; + } + } + } + } +} + //===----------------------------------------------------------------------===// // WebAssembly Lowering private implementation. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/WebAssembly/simd-bitmask-mask.ll b/llvm/test/CodeGen/WebAssembly/simd-bitmask-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-bitmask-mask.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=+simd128 | FileCheck %s + +; Test that masks on the output of bitselect are optimized out. + +target triple = "wasm32-unknown-unknown" + +declare i32 @llvm.wasm.bitmask.v16i8(<16 x i8>) +declare i32 @llvm.wasm.bitmask.v8i16(<8 x i16>) +declare i32 @llvm.wasm.bitmask.v4i32(<4 x i32>) +declare i32 @llvm.wasm.bitmask.v2i64(<2 x i64>) + +define i32 @bitmask_v16i8_mask(<16 x i8> %x) { +; CHECK-LABEL: bitmask_v16i8_mask: +; CHECK: .functype bitmask_v16i8_mask (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.bitmask +; CHECK-NEXT: i32.const 32767 +; CHECK-NEXT: i32.and +; CHECK-NEXT: # fallthrough-return + %m = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> %x) + %v = and i32 %m, 32767 ;; 2^15 - 1 + ret i32 %v +} + +define i32 @bitmask_v16i8_no_mask(<16 x i8> %x) { +; CHECK-LABEL: bitmask_v16i8_no_mask: +; CHECK: .functype bitmask_v16i8_no_mask (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i8x16.bitmask +; CHECK-NEXT: # fallthrough-return + %m = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> %x) + %v = and i32 %m, 65535 ;; 2^16 - 1 + ret i32 %v +} + +define i32 @bitmask_v8i16_mask(<8 x i16> %x) { +; CHECK-LABEL: bitmask_v8i16_mask: +; CHECK: .functype bitmask_v8i16_mask (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.bitmask +; CHECK-NEXT: i32.const 127 +; CHECK-NEXT: i32.and +; CHECK-NEXT: # fallthrough-return + %m = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> %x) + %v = and i32 %m, 127 ;; 2^7 - 1 + ret i32 %v +} + +define i32 @bitmask_v8i16_no_mask(<8 x i16> %x) { +; CHECK-LABEL: bitmask_v8i16_no_mask: +; CHECK: .functype bitmask_v8i16_no_mask (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.bitmask +; CHECK-NEXT: # fallthrough-return + %m = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> %x) + %v = and i32 %m, 255 ;; 2^8 - 1 + ret i32 %v +} + +define i32 @bitmask_v4i32_mask(<4 x i32> %x) { +; CHECK-LABEL: bitmask_v4i32_mask: +; CHECK: .functype bitmask_v4i32_mask (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.bitmask +; CHECK-NEXT: i32.const 7 +; CHECK-NEXT: i32.and +; CHECK-NEXT: # fallthrough-return + %m = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> %x) + %v = and i32 %m, 7 ;; 2^3 - 1 + ret i32 %v +} + +define i32 @bitmask_v4i32_no_mask(<4 x i32> %x) { +; CHECK-LABEL: bitmask_v4i32_no_mask: +; CHECK: .functype bitmask_v4i32_no_mask (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.bitmask +; CHECK-NEXT: # fallthrough-return + %m = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> %x) + %v = and i32 %m, 15 ;; 2^4 - 1 + ret i32 %v +} + +define i32 @bitmask_v2i64_mask(<2 x i64> %x) { +; CHECK-LABEL: bitmask_v2i64_mask: +; CHECK: .functype bitmask_v2i64_mask (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.bitmask +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i32.and +; CHECK-NEXT: # fallthrough-return + %m = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> %x) + %v = and i32 %m, 1 ;; 2^1 - 1 + ret i32 %v +} + +define i32 @bitmask_v2i64_no_mask(<2 x i64> %x) { +; CHECK-LABEL: bitmask_v2i64_no_mask: +; CHECK: .functype bitmask_v2i64_no_mask (v128) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.bitmask +; CHECK-NEXT: # fallthrough-return + %m = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> %x) + %v = and i32 %m, 3 ;; 2^2 - 1 + ret i32 %v +}