diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" @@ -1565,6 +1566,7 @@ }; } else if (NumConstantLanes >= NumSplatLanes && Subtarget->hasUnimplementedSIMD128()) { + // If we support v128.const, emit it directly SmallVector ConstLanes; for (const SDValue &Lane : Op->op_values()) { if (IsConstant(Lane)) { @@ -1576,11 +1578,67 @@ } } Result = DAG.getBuildVector(VecT, DL, ConstLanes); - IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { return IsConstant(Lane); }; - } - if (!Result) { + } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) { + // Otherwise, if this is an integer vector, pack the lane values together so + // we can construct the 128-bit constant from a pair of i64s using a splat + // followed by at most one i64x2.replace_lane. Also keep track of the lanes + // that actually matter so we can avoid the replace_lane in more cases. + std::array I64s({0, 0}); + std::array ConstLaneMasks({0, 0}); + uint8_t *I64Bytes = reinterpret_cast(I64s.data()); + uint8_t *MaskBytes = reinterpret_cast(ConstLaneMasks.data()); + unsigned I = 0; + size_t ByteStep = VecT.getScalarSizeInBits() / 8; + for (const SDValue &Lane : Op->op_values()) { + if (IsConstant(Lane)) { + using llvm::support::little; + using llvm::support::endian::byte_swap; + // The endianness of the compiler matters here. We want to enforce + // little endianness so that the bytes of a smaller integer type will + // occur first in the uint64_t. + auto *Const = cast(Lane.getNode()); + uint64_t Val = byte_swap(Const->getLimitedValue(), little); + uint8_t *ValPtr = reinterpret_cast(&Val); + std::copy(ValPtr, ValPtr + ByteStep, I64Bytes + I * ByteStep); + uint64_t Mask = uint64_t(-1LL); + uint8_t *MaskPtr = reinterpret_cast(&Mask); + std::copy(MaskPtr, MaskPtr + ByteStep, MaskBytes + I * ByteStep); + } + ++I; + } + // Check whether all constant lanes in the second half of the vector are + // equivalent in the first half or vice versa to determine whether splatting + // either side will be sufficient to materialize the constant. As a special + // case, if the first and second halves have no constant lanes in common, we + // can just combine them. + bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1]; + bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0]; + bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0; + + uint64_t Splatted; + if (SecondHalfSufficient) { + Splatted = I64s[1]; + } else if (CombinedSufficient) { + Splatted = I64s[0] | I64s[1]; + } else { + Splatted = I64s[0]; + } + + Result = DAG.getSplatBuildVector(MVT::v2i64, DL, + DAG.getConstant(Splatted, DL, MVT::i64)); + if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) { + Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result, + DAG.getConstant(I64s[1], DL, MVT::i64), + DAG.getConstant(1, DL, MVT::i32)); + } + Result = DAG.getBitcast(VecT, Result); + IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) { + return IsConstant(Lane); + }; + } else { // Use a splat, but possibly a load_splat LoadSDNode *SplattedLoad; if ((SplattedLoad = dyn_cast(SplatValue)) && @@ -1593,11 +1651,14 @@ } else { Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); } - IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { return Lane == SplatValue; }; } + assert(Result); + assert(IsLaneConstructed); + // Add replace_lane instructions for any unhandled values for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -8,12 +8,73 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" +; CHECK-LABEL: emulated_const_trivial_splat: +; CHECK-NEXT: .functype emulated_const_trivial_splat () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 8589934593 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: return $pop1 +; UNIMP: v128.const +define <4 x i32> @emulated_const_trivial_splat() { + ret <4 x i32> +} + +; CHECK-LABEL: emulated_const_first_sufficient: +; CHECK-NEXT: .functype emulated_const_first_sufficient () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 8589934593 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: return $pop1 +; UNIMP: v128.const +define <4 x i32> @emulated_const_first_sufficient() { + ret <4 x i32> +} + +; CHECK-LABEL: emulated_const_second_sufficient: +; CHECK-NEXT: .functype emulated_const_second_sufficient () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 8589934593 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: return $pop1 +; UNIMP: v128.const +define <4 x i32> @emulated_const_second_sufficient() { + ret <4 x i32> +} + +; CHECK-LABEL: emulated_const_combined_sufficient: +; CHECK-NEXT: .functype emulated_const_combined_sufficient () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 8589934593 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: return $pop1 +; UNIMP: v128.const +define <4 x i32> @emulated_const_combined_sufficient() { + ret <4 x i32> +} + +; CHECK-LABEL: emulated_const_either_sufficient: +; CHECK-NEXT: .functype emulated_const_either_sufficient () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 1 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: return $pop1 +; UNIMP: v128.const +define <4 x i32> @emulated_const_either_sufficient() { + ret <4 x i32> +} + +; CHECK-LABEL: emulated_const_neither_sufficient: +; CHECK-NEXT: .functype emulated_const_neither_sufficient () -> (v128) +; SIMD-VM-NEXT: i64.const $push0=, 8589934593 +; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 +; SIMD-VM-NEXT: i64.const $push2=, 17179869184 +; SIMD-VM-NEXT: i64x2.replace_lane $push3=, $pop1, 1, $pop2 +; SIMD-VM-NEXT: return $pop3 +define <4 x i32> @emulated_const_neither_sufficient() { + ret <4 x i32> +} + ; CHECK-LABEL: same_const_one_replaced_i16x8: ; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128) ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42 ; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 ; UNIMP-NEXT: return $pop[[L1]] -; SIMD-VM: i16x8.splat +; SIMD-VM: i64x2.splat define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { %v = insertelement <8 x i16> , @@ -27,7 +88,7 @@ ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8 ; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 ; UNIMP-NEXT: return $pop[[L1]] -; SIMD-VM: i16x8.splat +; SIMD-VM: i64x2.splat define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) { %v = insertelement <8 x i16> , @@ -68,7 +129,7 @@ ; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128) ; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1 ; UNIMP-NEXT: return $pop[[L0]] -; SIMD-VM: i32x4.splat +; SIMD-VM: i64x2.splat define <4 x i32> @splat_common_const_i32x4() { ret <4 x i32> } @@ -206,7 +267,7 @@ ; UNIMP: i8x16.replace_lane ; UNIMP: i8x16.replace_lane ; UNIMP: return -; SIMD-VM: i8x16.splat +; SIMD-VM: i64x2.splat define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { ; swizzle 0 %m0 = extractelement <16 x i8> %mask, i32 0