Index: lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -151,14 +151,16 @@ for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) setOperationAction(Op, MVT::v2i64, Custom); - // There is no select instruction for vectors - if (Subtarget->hasSIMD128()) { - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) - setOperationAction(ISD::VSELECT, T, Expand); - if (EnableUnimplementedWasmSIMDInstrs) - for (auto T : {MVT::v2i64, MVT::v2f64}) - setOperationAction(ISD::VSELECT, T, Expand); - } + // There are no select instructions for vectors, but ISD::SELECT is handled + // with patterns. + if (Subtarget->hasSIMD128()) + for (auto Op : {ISD::VSELECT, ISD::SELECT_CC}) { + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) + setOperationAction(Op, T, Expand); + if (EnableUnimplementedWasmSIMDInstrs) + for (auto T : {MVT::v2i64, MVT::v2f64}) + setOperationAction(Op, T, Expand); + } // As a special case, these operators use the type to mean the type to // sign-extend from. Index: lib/Target/WebAssembly/WebAssemblyInstrSIMD.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -558,6 +558,10 @@ defm "" : SIMDNot; defm "" : SIMDNot; +//===----------------------------------------------------------------------===// +// Select operations +//===----------------------------------------------------------------------===// + // Bitwise select: v128.bitselect multiclass Bitselect { defm BITSELECT_#vec_t : @@ -580,6 +584,67 @@ (!cast("BITSELECT_"#vec_t) V128:$v1, V128:$v2, V128:$c)>; +multiclass SIMDSelectPat { + // Straightforward lowering of select to bitselect + def : Pat<(vec_t (select (i32 I32:$c), (vec_t V128:$v1), (vec_t V128:$v2))), + (vec_t (!cast("BITSELECT_"#vec_t) V128:$v1, V128:$v2, + (mask_t (!cast("SPLAT_"#mask_t) + (lane_t (!cast("SELECT_"#reg_t) + (lane_t -1), (lane_t 0), (i32 I32:$c) + )) + )) + ))>; + // Select conforms to getBooleanContents, but the WebAssembly instruction + // accepts nonzero values as true, so remove the `and 1` truncation produced + // by the legalizer when it converts i1s to i32s. + def : Pat<(vec_t (select + (i32 (and I32:$c, (i32 1))), + (vec_t V128:$v1), + (vec_t V128:$v2) + )), + (vec_t (!cast("BITSELECT_"#vec_t) V128:$v1, V128:$v2, + (mask_t (!cast("SPLAT_"#mask_t) + (lane_t (!cast("SELECT_"#reg_t) + (lane_t -1), (lane_t 0), (i32 I32:$c) + )) + )) + ))>; + // Lower away explicit comparisons with zero as select conditions. + def : Pat<(vec_t (select + (i32 (setne I32:$c, (i32 0))), + (vec_t V128:$v1), + (vec_t V128:$v2) + )), + (vec_t (!cast("BITSELECT_"#vec_t) V128:$v1, V128:$v2, + (mask_t (!cast("SPLAT_"#mask_t) + (lane_t (!cast("SELECT_"#reg_t) + (lane_t -1), (lane_t 0), (i32 I32:$c) + )) + )) + ))>; + // Reverse the branches when the condition is equality with zero. + def : Pat<(vec_t (select + (i32 (seteq I32:$c, (i32 0))), + (vec_t V128:$v1), + (vec_t V128:$v2) + )), + (vec_t (!cast("BITSELECT_"#vec_t) V128:$v2, V128:$v1, + (mask_t (!cast("SPLAT_"#mask_t) + (lane_t (!cast("SELECT_"#reg_t) + (lane_t -1), (lane_t 0), (i32 I32:$c) + )) + )) + ))>; +} + +defm : SIMDSelectPat; +defm : SIMDSelectPat; +defm : SIMDSelectPat; +defm : SIMDSelectPat; +defm : SIMDSelectPat; +defm : SIMDSelectPat; + //===----------------------------------------------------------------------===// // Boolean horizontal reductions //===----------------------------------------------------------------------===// Index: test/CodeGen/WebAssembly/simd-select.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/simd-select.ll @@ -0,0 +1,427 @@ +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128,+sign-ext | FileCheck %s + +; Test that vector selects of various varieties lower correctly to bitselects. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; ============================================================================== +; 16 x i8 +; ============================================================================== +; CHECK-LABEL: vselect_v16i8: +; CHECK-NEXT: .param v128, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 7{{$}} +; CHECK-NEXT: i8x16.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 7{{$}} +; CHECK-NEXT: i8x16.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) { + %res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y + ret <16 x i8> %res +} + +; CHECK-LABEL: select_v16i8: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i8x16.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <16 x i8> @select_v16i8(i1 %c, <16 x i8> %x, <16 x i8> %y) { + %res = select i1 %c, <16 x i8> %x, <16 x i8> %y + ret <16 x i8> %res +} + +; CHECK-LABEL: select_cmp_v16i8: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0 +; CHECK-NEXT: i32.lt_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]] +; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, -1, 0, $pop[[L1]]{{$}} +; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <16 x i8> @select_cmp_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) { + %c = icmp slt i32 %i, 0 + %res = select i1 %c, <16 x i8> %x, <16 x i8> %y + ret <16 x i8> %res +} + +; CHECK-LABEL: select_ne_v16i8: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i8x16.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <16 x i8> @select_ne_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) { + %c = icmp ne i32 %i, 0 + %res = select i1 %c, <16 x i8> %x, <16 x i8> %y + ret <16 x i8> %res +} + +; CHECK-LABEL: select_eq_v16i8: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i8x16.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <16 x i8> @select_eq_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) { + %c = icmp eq i32 %i, 0 + %res = select i1 %c, <16 x i8> %x, <16 x i8> %y + ret <16 x i8> %res +} + +; ============================================================================== +; 8 x i16 +; ============================================================================== +; CHECK-LABEL: vselect_v8i16: +; CHECK-NEXT: .param v128, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 15{{$}} +; CHECK-NEXT: i16x8.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 15{{$}} +; CHECK-NEXT: i16x8.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) { + %res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y + ret <8 x i16> %res +} + +; CHECK-LABEL: select_v8i16: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i16x8.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @select_v8i16(i1 %c, <8 x i16> %x, <8 x i16> %y) { + %res = select i1 %c, <8 x i16> %x, <8 x i16> %y + ret <8 x i16> %res +} + +; CHECK-LABEL: select_cmp_v8i16: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0 +; CHECK-NEXT: i32.lt_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]] +; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, -1, 0, $pop[[L1]]{{$}} +; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @select_cmp_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) { + %c = icmp slt i32 %i, 0 + %res = select i1 %c, <8 x i16> %x, <8 x i16> %y + ret <8 x i16> %res +} + +; CHECK-LABEL: select_ne_v8i16: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i16x8.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @select_ne_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) { + %c = icmp ne i32 %i, 0 + %res = select i1 %c, <8 x i16> %x, <8 x i16> %y + ret <8 x i16> %res +} + +; CHECK-LABEL: select_eq_v8i16: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i16x8.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <8 x i16> @select_eq_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) { + %c = icmp eq i32 %i, 0 + %res = select i1 %c, <8 x i16> %x, <8 x i16> %y + ret <8 x i16> %res +} + +; ============================================================================== +; 4 x i32 +; ============================================================================== +; CHECK-LABEL: vselect_v4i32: +; CHECK-NEXT: .param v128, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}} +; CHECK-NEXT: i32x4.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 31{{$}} +; CHECK-NEXT: i32x4.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) { + %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %res +} + + +; CHECK-LABEL: select_v4i32: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @select_v4i32(i1 %c, <4 x i32> %x, <4 x i32> %y) { + %res = select i1 %c, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %res +} + +; CHECK-LABEL: select_cmp_v4i32: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0 +; CHECK-NEXT: i32.lt_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]] +; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, -1, 0, $pop[[L1]]{{$}} +; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @select_cmp_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) { + %c = icmp slt i32 %i, 0 + %res = select i1 %c, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %res +} + +; CHECK-LABEL: select_ne_v4i32: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @select_ne_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) { + %c = icmp ne i32 %i, 0 + %res = select i1 %c, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %res +} + +; CHECK-LABEL: select_eq_v4i32: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <4 x i32> @select_eq_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) { + %c = icmp eq i32 %i, 0 + %res = select i1 %c, <4 x i32> %x, <4 x i32> %y + ret <4 x i32> %res +} + +; ============================================================================== +; 2 x i64 +; ============================================================================== +; CHECK-LABEL: vselect_v2i64: +; CHECK-NEXT: .param v128, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 63{{$}} +; CHECK-NEXT: i64x2.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 63{{$}} +; CHECK-NEXT: i64x2.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) { + %res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y + ret <2 x i64> %res +} + +; CHECK-LABEL: select_v2i64: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i64.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @select_v2i64(i1 %c, <2 x i64> %x, <2 x i64> %y) { + %res = select i1 %c, <2 x i64> %x, <2 x i64> %y + ret <2 x i64> %res +} + +; CHECK-LABEL: select_cmp_v2i64: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0 +; CHECK-NEXT: i32.lt_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]] +; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, -1, 0, $pop[[L1]]{{$}} +; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @select_cmp_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) { + %c = icmp slt i32 %i, 0 + %res = select i1 %c, <2 x i64> %x, <2 x i64> %y + ret <2 x i64> %res +} + +; CHECK-LABEL: select_ne_v2i64: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i64.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @select_ne_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) { + %c = icmp ne i32 %i, 0 + %res = select i1 %c, <2 x i64> %x, <2 x i64> %y + ret <2 x i64> %res +} + +; CHECK-LABEL: select_eq_v2i64: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i64.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <2 x i64> @select_eq_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) { + %c = icmp eq i32 %i, 0 + %res = select i1 %c, <2 x i64> %x, <2 x i64> %y + ret <2 x i64> %res +} + +; ============================================================================== +; 4 x float +; ============================================================================== +; CHECK-LABEL: vselect_v4f32: +; CHECK-NEXT: .param v128, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}} +; CHECK-NEXT: i32x4.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 31{{$}} +; CHECK-NEXT: i32x4.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) { + %res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y + ret <4 x float> %res +} + +; CHECK-LABEL: select_v4f32: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <4 x float> @select_v4f32(i1 %c, <4 x float> %x, <4 x float> %y) { + %res = select i1 %c, <4 x float> %x, <4 x float> %y + ret <4 x float> %res +} + +; CHECK-LABEL: select_cmp_v4f32: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0 +; CHECK-NEXT: i32.lt_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]] +; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, -1, 0, $pop[[L1]]{{$}} +; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <4 x float> @select_cmp_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) { + %c = icmp slt i32 %i, 0 + %res = select i1 %c, <4 x float> %x, <4 x float> %y + ret <4 x float> %res +} + +; CHECK-LABEL: select_ne_v4f32: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <4 x float> @select_ne_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) { + %c = icmp ne i32 %i, 0 + %res = select i1 %c, <4 x float> %x, <4 x float> %y + ret <4 x float> %res +} + +; CHECK-LABEL: select_eq_v4f32: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <4 x float> @select_eq_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) { + %c = icmp eq i32 %i, 0 + %res = select i1 %c, <4 x float> %x, <4 x float> %y + ret <4 x float> %res +} + +; ============================================================================== +; 2 x double +; ============================================================================== +; CHECK-LABEL: vselect_v2f64: +; CHECK-NEXT: .param v128, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 63{{$}} +; CHECK-NEXT: i64x2.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} +; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 63{{$}} +; CHECK-NEXT: i64x2.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y) { + %res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y + ret <2 x double> %res +} + +; CHECK-LABEL: select_v2f64: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i64.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <2 x double> @select_v2f64(i1 %c, <2 x double> %x, <2 x double> %y) { + %res = select i1 %c, <2 x double> %x, <2 x double> %y + ret <2 x double> %res +} + +; CHECK-LABEL: select_cmp_v2f64: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0 +; CHECK-NEXT: i32.lt_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]] +; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, -1, 0, $pop[[L1]]{{$}} +; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <2 x double> @select_cmp_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) { + %c = icmp slt i32 %i, 0 + %res = select i1 %c, <2 x double> %x, <2 x double> %y + ret <2 x double> %res +} + +; CHECK-LABEL: select_ne_v2f64: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i64.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <2 x double> @select_ne_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) { + %c = icmp ne i32 %i, 0 + %res = select i1 %c, <2 x double> %x, <2 x double> %y + ret <2 x double> %res +} + +; CHECK-LABEL: select_eq_v2f64: +; CHECK-NEXT: .param i32, v128, v128{{$}} +; CHECK-NEXT: .result v128{{$}} +; CHECK-NEXT: i64.select $push[[L0:[0-9]+]]=, -1, 0, $0{{$}} +; CHECK-NEXT: i64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}} +; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $1, $pop[[L1]]{{$}} +; CHECK-NEXT: return $pop[[R]]{{$}} +define <2 x double> @select_eq_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) { + %c = icmp eq i32 %i, 0 + %res = select i1 %c, <2 x double> %x, <2 x double> %y + ret <2 x double> %res +} Index: test/CodeGen/WebAssembly/simd-vselect.ll =================================================================== --- test/CodeGen/WebAssembly/simd-vselect.ll +++ /dev/null @@ -1,90 +0,0 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -wasm-enable-unimplemented-simd -mattr=+simd128,+sign-ext | FileCheck %s - -; Test that lanewise vector selects lower correctly to bitselects - -target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" -target triple = "wasm32-unknown-unknown" - -; CHECK-LABEL: vselect_v16i8: -; CHECK-NEXT: .param v128, v128, v128{{$}} -; CHECK-NEXT: .result v128{{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 7{{$}} -; CHECK-NEXT: i8x16.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 7{{$}} -; CHECK-NEXT: i8x16.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) { - %res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y - ret <16 x i8> %res -} - -; CHECK-LABEL: vselect_v8i16: -; CHECK-NEXT: .param v128, v128, v128{{$}} -; CHECK-NEXT: .result v128{{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 15{{$}} -; CHECK-NEXT: i16x8.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 15{{$}} -; CHECK-NEXT: i16x8.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) { - %res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y - ret <8 x i16> %res -} - -; CHECK-LABEL: vselect_v4i32: -; CHECK-NEXT: .param v128, v128, v128{{$}} -; CHECK-NEXT: .result v128{{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32x4.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32x4.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) { - %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y - ret <4 x i32> %res -} - -; CHECK-LABEL: vselect_v2i64: -; CHECK-NEXT: .param v128, v128, v128{{$}} -; CHECK-NEXT: .result v128{{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 63{{$}} -; CHECK-NEXT: i64x2.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 63{{$}} -; CHECK-NEXT: i64x2.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) { - %res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y - ret <2 x i64> %res -} - -; CHECK-LABEL: vselect_v4f32: -; CHECK-NEXT: .param v128, v128, v128{{$}} -; CHECK-NEXT: .result v128{{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32x4.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 31{{$}} -; CHECK-NEXT: i32x4.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) { - %res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y - ret <4 x float> %res -} - -; CHECK-LABEL: vselect_v2f64: -; CHECK-NEXT: .param v128, v128, v128{{$}} -; CHECK-NEXT: .result v128{{$}} -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 63{{$}} -; CHECK-NEXT: i64x2.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 63{{$}} -; CHECK-NEXT: i64x2.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}} -; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y) { - %res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y - ret <2 x double> %res -}