diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1314,39 +1314,42 @@ SelectionDAG &DAG) const { SDLoc DL(Op); // If sign extension operations are disabled, allow sext_inreg only if operand - // is a vector extract. SIMD does not depend on sign extension operations, but - // allowing sext_inreg in this context lets us have simple patterns to select - // extract_lane_s instructions. Expanding sext_inreg everywhere would be - // simpler in this file, but would necessitate large and brittle patterns to - // undo the expansion and select extract_lane_s instructions. + // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign + // extension operations, but allowing sext_inreg in this context lets us have + // simple patterns to select extract_lane_s instructions. Expanding sext_inreg + // everywhere would be simpler in this file, but would necessitate large and + // brittle patterns to undo the expansion and select extract_lane_s + // instructions. assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); - if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { - const SDValue &Extract = Op.getOperand(0); - MVT VecT = Extract.getOperand(0).getSimpleValueType(); - MVT ExtractedLaneT = static_cast(Op.getOperand(1).getNode()) - ->getVT() - .getSimpleVT(); - MVT ExtractedVecT = - MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); - if (ExtractedVecT == VecT) - return Op; - // Bitcast vector to appropriate type to ensure ISel pattern coverage - const SDValue &Index = Extract.getOperand(1); - unsigned IndexVal = - static_cast(Index.getNode())->getZExtValue(); - unsigned Scale = - ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); - assert(Scale > 1); - SDValue NewIndex = - DAG.getConstant(IndexVal * Scale, DL, Index.getValueType()); - SDValue NewExtract = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), - DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), - NewExtract, Op.getOperand(1)); - } - // Otherwise expand - return SDValue(); + if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + const SDValue &Extract = Op.getOperand(0); + MVT VecT = Extract.getOperand(0).getSimpleValueType(); + if (VecT.getVectorElementType().getSizeInBits() > 32) + return SDValue(); + MVT ExtractedLaneT = static_cast(Op.getOperand(1).getNode()) + ->getVT() + .getSimpleVT(); + MVT ExtractedVecT = + MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits()); + if (ExtractedVecT == VecT) + return Op; + + // Bitcast vector to appropriate type to ensure ISel pattern coverage + const SDValue &Index = Extract.getOperand(1); + unsigned IndexVal = + static_cast(Index.getNode())->getZExtValue(); + unsigned Scale = + ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements(); + assert(Scale > 1); + SDValue NewIndex = + DAG.getConstant(IndexVal * Scale, DL, Index.getValueType()); + SDValue NewExtract = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(), + DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract, + Op.getOperand(1)); } SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -330,81 +330,49 @@ //===----------------------------------------------------------------------===// // Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u -multiclass ExtractLane simdop, - string suffix = "", SDNode extract = vector_extract> { +multiclass ExtractLane simdop, string suffix = ""> { defm EXTRACT_LANE_#vec_t#suffix : SIMD_I<(outs reg_t:$dst), (ins V128:$vec, vec_i8imm_op:$idx), - (outs), (ins vec_i8imm_op:$idx), - [(set reg_t:$dst, (extract (vec_t V128:$vec), (i32 imm_t:$idx)))], + (outs), (ins vec_i8imm_op:$idx), [], vec#".extract_lane"#suffix#"\t$dst, $vec, $idx", vec#".extract_lane"#suffix#"\t$idx", simdop>; } -multiclass ExtractPat { - def _s : PatFrag<(ops node:$vec, node:$idx), - (i32 (sext_inreg - (i32 (vector_extract - node:$vec, - node:$idx - )), - lane_t - ))>; - def _u : PatFrag<(ops node:$vec, node:$idx), - (i32 (and - (i32 (vector_extract - node:$vec, - node:$idx - )), - (i32 mask) - ))>; -} - -defm extract_i8x16 : ExtractPat; -defm extract_i16x8 : ExtractPat; - -multiclass ExtractLaneExtended baseInst> { - defm "" : ExtractLane("extract_i8x16"#sign)>; - defm "" : ExtractLane("extract_i16x8"#sign)>; -} - -defm "" : ExtractLaneExtended<"_s", 5>; -defm "" : ExtractLaneExtended<"_u", 6>; -defm "" : ExtractLane; -defm "" : ExtractLane; -defm "" : ExtractLane; -defm "" : ExtractLane; - -// It would be more conventional to use unsigned extracts, but v8 -// doesn't implement them yet -def : Pat<(i32 (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx))), - (EXTRACT_LANE_v16i8_s V128:$vec, (i32 LaneIdx16:$idx))>; -def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))), - (EXTRACT_LANE_v8i16_s V128:$vec, (i32 LaneIdx8:$idx))>; - -// Lower undef lane indices to zero -def : Pat<(and (i32 (vector_extract (v16i8 V128:$vec), undef)), (i32 0xff)), - (EXTRACT_LANE_v16i8_u V128:$vec, 0)>; -def : Pat<(and (i32 (vector_extract (v8i16 V128:$vec), undef)), (i32 0xffff)), - (EXTRACT_LANE_v8i16_u V128:$vec, 0)>; -def : Pat<(i32 (vector_extract (v16i8 V128:$vec), undef)), - (EXTRACT_LANE_v16i8_u V128:$vec, 0)>; -def : Pat<(i32 (vector_extract (v8i16 V128:$vec), undef)), - (EXTRACT_LANE_v8i16_u V128:$vec, 0)>; -def : Pat<(sext_inreg (i32 (vector_extract (v16i8 V128:$vec), undef)), i8), - (EXTRACT_LANE_v16i8_s V128:$vec, 0)>; -def : Pat<(sext_inreg (i32 (vector_extract (v8i16 V128:$vec), undef)), i16), - (EXTRACT_LANE_v8i16_s V128:$vec, 0)>; -def : Pat<(vector_extract (v4i32 V128:$vec), undef), - (EXTRACT_LANE_v4i32 V128:$vec, 0)>; -def : Pat<(vector_extract (v2i64 V128:$vec), undef), - (EXTRACT_LANE_v2i64 V128:$vec, 0)>; -def : Pat<(vector_extract (v4f32 V128:$vec), undef), - (EXTRACT_LANE_v4f32 V128:$vec, 0)>; -def : Pat<(vector_extract (v2f64 V128:$vec), undef), - (EXTRACT_LANE_v2f64 V128:$vec, 0)>; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; +defm "" : ExtractLane; + +def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), + (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; +def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), + (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; +def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)), + (EXTRACT_LANE_v4i32 V128:$vec, imm:$idx)>; +def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)), + (EXTRACT_LANE_v4f32 V128:$vec, imm:$idx)>; +def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)), + (EXTRACT_LANE_v2i64 V128:$vec, imm:$idx)>; +def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)), + (EXTRACT_LANE_v2f64 V128:$vec, imm:$idx)>; + +def : Pat< + (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8), + (EXTRACT_LANE_v16i8_s V128:$vec, imm:$idx)>; +def : Pat< + (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)), + (EXTRACT_LANE_v16i8_u V128:$vec, imm:$idx)>; +def : Pat< + (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16), + (EXTRACT_LANE_v8i16_s V128:$vec, imm:$idx)>; +def : Pat< + (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)), + (EXTRACT_LANE_v8i16_u V128:$vec, imm:$idx)>; // Replace lane value: replace_lane multiclass ReplaceLane (v128){{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} ; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]] ; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]] ; Skip 14 lanes -; SIMD128: i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}} +; SIMD128: i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}} ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} ; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} @@ -197,14 +197,14 @@ ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} ; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]] ; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]] ; Skip 14 lanes ; SIMD128: i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}} ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} ; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} @@ -233,14 +233,14 @@ ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} ; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]] ; SIMD128-NEXT: i8x16.splat $push[[M3:[0-9]+]]=, $pop[[M2]] ; Skip 14 lanes ; SIMD128: i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[L5:[0-9]+]]=, $1, 15{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}} ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} ; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} @@ -470,15 +470,15 @@ ; CHECK-LABEL: shl_vec_v8i16: ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} ; SIMD128-NEXT: i32.shl $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}} ; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}} ; Skip 6 lanes -; SIMD128: i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}} +; SIMD128: i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}} ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} ; SIMD128-NEXT: i32.shl $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} @@ -506,14 +506,14 @@ ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} ; SIMD128-NEXT: i32.shr_s $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}} ; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}} ; Skip 6 lanes ; SIMD128: i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}} ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} ; SIMD128-NEXT: i32.shr_s $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} @@ -541,14 +541,14 @@ ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} ; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i32.and $push[[M1:[0-9]+]]=, $pop[[L1]], $pop[[M0]]{{$}} ; SIMD128-NEXT: i32.shr_u $push[[M2:[0-9]+]]=, $pop[[L0]], $pop[[M1]]{{$}} ; SIMD128-NEXT: i16x8.splat $push[[M3:[0-9]+]]=, $pop[[M2]]{{$}} ; Skip 6 lanes ; SIMD128: i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[L5:[0-9]+]]=, $1, 7{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}} ; SIMD128-NEXT: i32.const $push[[M4:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i32.and $push[[M5:[0-9]+]]=, $pop[[L5]], $pop[[M4]]{{$}} ; SIMD128-NEXT: i32.shr_u $push[[M6:[0-9]+]]=, $pop[[L4]], $pop[[M5]]{{$}} diff --git a/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll b/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll --- a/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128 ; Test that vector sign extensions lower to shifts @@ -7,55 +6,133 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: sext_inreg_v16i8: +; CHECK-LABEL: sext_v16i8: ; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype sext_inreg_v16i8 (v128) -> (v128){{$}} +; SIMD128-NEXT: .functype sext_v16i8 (v128) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i8x16.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}} ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 7{{$}} ; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} -define <16 x i8> @sext_inreg_v16i8(<16 x i1> %x) { +define <16 x i8> @sext_v16i8(<16 x i1> %x) { %res = sext <16 x i1> %x to <16 x i8> ret <16 x i8> %res } -; CHECK-LABEL: sext_inreg_v8i16: +; CHECK-LABEL: sext_v8i16: ; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype sext_inreg_v8i16 (v128) -> (v128){{$}} +; SIMD128-NEXT: .functype sext_v8i16 (v128) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i16x8.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}} ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 15{{$}} ; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} -define <8 x i16> @sext_inreg_v8i16(<8 x i1> %x) { +define <8 x i16> @sext_v8i16(<8 x i1> %x) { %res = sext <8 x i1> %x to <8 x i16> ret <8 x i16> %res } -; CHECK-LABEL: sext_inreg_v4i32: +; CHECK-LABEL: sext_v4i32: ; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype sext_inreg_v4i32 (v128) -> (v128){{$}} +; SIMD128-NEXT: .functype sext_v4i32 (v128) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 31{{$}} ; SIMD128-NEXT: i32x4.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}} ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 31{{$}} ; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} -define <4 x i32> @sext_inreg_v4i32(<4 x i1> %x) { +define <4 x i32> @sext_v4i32(<4 x i1> %x) { %res = sext <4 x i1> %x to <4 x i32> ret <4 x i32> %res } -; CHECK-LABEL: sext_inreg_v2i64: +; CHECK-LABEL: sext_v2i64: ; NO-SIMD128-NOT: i64x2 -; SDIM128-VM-NOT: i64x2 -; SIMD128-NEXT: .functype sext_inreg_v2i64 (v128) -> (v128){{$}} +; SIMD128-NEXT: .functype sext_v2i64 (v128) -> (v128){{$}} ; SIMD128-NEXT: i32.const $push[[T0:[0-9]+]]=, 63{{$}} ; SIMD128-NEXT: i64x2.shl $push[[T1:[0-9]+]]=, $0, $pop[[T0]]{{$}} ; SIMD128-NEXT: i32.const $push[[T2:[0-9]+]]=, 63{{$}} ; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $pop[[T1]], $pop[[T2]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} -define <2 x i64> @sext_inreg_v2i64(<2 x i1> %x) { +define <2 x i64> @sext_v2i64(<2 x i1> %x) { %res = sext <2 x i1> %x to <2 x i64> ret <2 x i64> %res } + +; CHECK-LABEL: sext_inreg_i8_to_i16: +; SIMD128-NEXT: .functype sext_inreg_i8_to_i16 (v128) -> (i32){{$}} +; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i16 @sext_inreg_i8_to_i16(<8 x i16> %x) { + %lane = extractelement <8 x i16> %x, i32 1 + %a = shl i16 %lane, 8 + %res = ashr i16 %a, 8 + ret i16 %res +} + +; CHECK-LABEL: sext_inreg_i8_to_i32: +; SIMD128-NEXT: .functype sext_inreg_i8_to_i32 (v128) -> (i32){{$}} +; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 4{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i32 @sext_inreg_i8_to_i32(<4 x i32> %x) { + %lane = extractelement <4 x i32> %x, i32 1 + %a = shl i32 %lane, 24 + %res = ashr i32 %a, 24 + ret i32 %res +} + +; CHECK-LABEL: sext_inreg_i16_to_i32: +; SIMD128-NEXT: .functype sext_inreg_i16_to_i32 (v128) -> (i32){{$}} +; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i32 @sext_inreg_i16_to_i32(<4 x i32> %x) { + %lane = extractelement <4 x i32> %x, i32 1 + %a = shl i32 %lane, 16 + %res = ashr i32 %a, 16 + ret i32 %res +} + +; CHECK-LABEL: sext_inreg_i8_to_i64: +; SIMD128-NEXT: .functype sext_inreg_i8_to_i64 (v128) -> (i64){{$}} +; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}} +; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 56{{$}} +; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 56{{$}} +; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i64 @sext_inreg_i8_to_i64(<2 x i64> %x) { + %lane = extractelement <2 x i64> %x, i32 1 + %a = shl i64 %lane, 56 + %res = ashr i64 %a, 56 + ret i64 %res +} + +; CHECK-LABEL: sext_inreg_i16_to_i64: +; SIMD128-NEXT: .functype sext_inreg_i16_to_i64 (v128) -> (i64){{$}} +; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}} +; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 48{{$}} +; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 48{{$}} +; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i64 @sext_inreg_i16_to_i64(<2 x i64> %x) { + %lane = extractelement <2 x i64> %x, i32 1 + %a = shl i64 %lane, 48 + %res = ashr i64 %a, 48 + ret i64 %res +} + +; CHECK-LABEL: sext_inreg_i32_to_i64: +; NO-SIMD128-NOT: i64x2 +; SIMD128-NEXT: .functype sext_inreg_i32_to_i64 (v128) -> (i64){{$}} +; SIMD128-NEXT: i64x2.extract_lane $push[[T0:[0-9]+]]=, $0, 1{{$}} +; SIMD128-NEXT: i64.const $push[[T1:[0-9]+]]=, 32{{$}} +; SIMD128-NEXT: i64.shl $push[[T2:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} +; SIMD128-NEXT: i64.const $push[[T3:[0-9]+]]=, 32{{$}} +; SIMD128-NEXT: i64.shr_s $push[[R:[0-9]+]]=, $pop[[T2]], $pop[[T3]]{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define i64 @sext_inreg_i32_to_i64(<2 x i64> %x) { + %lane = extractelement <2 x i64> %x, i32 1 + %a = shl i64 %lane, 32 + %res = ashr i64 %a, 32 + ret i64 %res +} diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -127,7 +127,7 @@ ; CHECK-LABEL: extract_v16i8: ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype extract_v16i8 (v128) -> (i32){{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 13{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 13{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define i8 @extract_v16i8(<16 x i8> %v) { %elem = extractelement <16 x i8> %v, i8 13 @@ -155,7 +155,7 @@ ; CHECK-LABEL: extract_undef_v16i8: ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype extract_undef_v16i8 (v128) -> (i32){{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define i8 @extract_undef_v16i8(<16 x i8> %v) { %elem = extractelement <16 x i8> %v, i8 undef @@ -393,7 +393,7 @@ ; CHECK-LABEL: extract_v8i16: ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .functype extract_v8i16 (v128) -> (i32){{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 5{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 5{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define i16 @extract_v8i16(<8 x i16> %v) { %elem = extractelement <8 x i16> %v, i16 5 @@ -423,7 +423,7 @@ ; CHECK-LABEL: extract_undef_v8i16: ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .functype extract_undef_v8i16 (v128) -> (i32){{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[R:[0-9]+]]=, $0, 0{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[R:[0-9]+]]=, $0, 0{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define i16 @extract_undef_v8i16(<8 x i16> %v) { %elem = extractelement <8 x i16> %v, i16 undef