Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1155,6 +1155,31 @@ return SDValue(); } +static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) { + EVT LaneT = Op.getSimpleValueType().getVectorElementType(); + // 32-bit and 64-bit unrolled shifts will have proper semantics + if (LaneT.bitsGE(MVT::i32)) + return DAG.UnrollVectorOp(Op.getNode()); + // Otherwise mask the shift value to get proper semantics from 32-bit shift + SDLoc DL(Op); + SDValue ShiftVal = Op.getOperand(1); + uint64_t MaskVal = LaneT.getSizeInBits() - 1; + SDValue MaskedShiftVal = DAG.getNode( + ISD::AND, // mask opcode + DL, ShiftVal.getValueType(), // masked value type + ShiftVal, // original shift value operand + DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand + ); + + return DAG.UnrollVectorOp( + DAG.getNode(Op.getOpcode(), // original shift opcode + DL, Op.getValueType(), // original return type + Op.getOperand(0), // original vector operand, + MaskedShiftVal // new masked shift value operand + ) + .getNode()); +} + SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -1162,12 +1187,17 @@ // Only manually lower vector shifts assert(Op.getSimpleValueType().isVector()); + // Expand all vector shifts until V8 fixes its implementation + // TODO: remove this once V8 is fixed + if (!Subtarget->hasUnimplementedSIMD128()) + return UnrollVectorShift(Op, DAG); + // Unroll non-splat vector shifts BuildVectorSDNode *ShiftVec; SDValue SplatVal; if (!(ShiftVec = dyn_cast(Op.getOperand(1).getNode())) || !(SplatVal = ShiftVec->getSplatValue())) - return DAG.UnrollVectorOp(Op.getNode()); + return UnrollVectorShift(Op, DAG); // All splats except i64x2 const splats are handled by patterns ConstantSDNode *SplatConst = dyn_cast(SplatVal); Index: test/CodeGen/WebAssembly/simd-arith.ll =================================================================== --- test/CodeGen/WebAssembly/simd-arith.ll +++ test/CodeGen/WebAssembly/simd-arith.ll @@ -90,7 +90,11 @@ ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} +; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} +; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} +; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; Skip 14 lanes @@ -122,7 +126,11 @@ ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} +; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} +; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} +; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; Skip 14 lanes @@ -154,7 +162,11 @@ ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} +; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} +; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} +; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} +; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; Skip 14 lanes @@ -304,7 +316,11 @@ ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} +; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} +; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} +; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} ; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; Skip 6 lanes @@ -335,7 +351,11 @@ ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} +; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} +; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} +; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} ; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; Skip 6 lanes @@ -366,7 +386,11 @@ ; NO-SIMD128-NOT: i16x8 ; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 0{{$}} +; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} +; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} +; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} +; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} +; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} ; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; Skip 6 lanes