diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -157,11 +157,9 @@ setOperationAction(ISD::MUL, MVT::v16i8, Expand); // There is no vector conditional select instruction - // TODO: Implement SELECT_V128 - for (auto Op : {ISD::SELECT_CC, ISD::SELECT}) - for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, - MVT::v2f64}) - setOperationAction(Op, T, Expand); + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, + MVT::v2f64}) + setOperationAction(ISD::SELECT_CC, T, Expand); // Expand integer operations supported for scalars but not SIMD for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -584,6 +584,37 @@ V128:$v1, V128:$v2, V128:$c )>; +// MVP select on v128 values +foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { +defm SELECT_#vec_t : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), + (outs), (ins), + [(set V128:$dst, + (select I32:$cond, + (vec_t V128:$lhs), (vec_t V128:$rhs) + ) + )], + "v128.select\t$dst, $lhs, $rhs, $cond", + "v128.select", 0x1b>; + +// ISD::SELECT requires its operand to conform to getBooleanContents, but +// WebAssembly's select interprets any non-zero value as true, so we can fold +// a setne with 0 into a select. +def : Pat<(select + (i32 (setne I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs) + ), + (!cast("SELECT_"#vec_t) + V128:$lhs, V128:$rhs, I32:$cond + )>; + +// And again, this time with seteq instead of setne and the arms reversed. +def : Pat<(select + (i32 (seteq I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs) + ), + (!cast("SELECT_"#vec_t) + V128:$rhs, V128:$lhs, I32:$cond + )>; +} // foreach vec_t + //===----------------------------------------------------------------------===// // Integer unary arithmetic //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/WebAssembly/simd-select.ll b/llvm/test/CodeGen/WebAssembly/simd-select.ll --- a/llvm/test/CodeGen/WebAssembly/simd-select.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-select.ll @@ -49,12 +49,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const -1 -; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i8x16.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %res @@ -67,10 +63,9 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.const 31 -; CHECK-NEXT: i32.shr_s -; CHECK-NEXT: i8x16.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.lt_s +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <16 x i8> %x, <16 x i8> %y @@ -83,12 +78,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const -1 -; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i8x16.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <16 x i8> %x, <16 x i8> %y @@ -99,14 +90,10 @@ ; CHECK-LABEL: select_eq_v16i8: ; CHECK: .functype select_eq_v16i8 (i32, v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: i32.const -1 +; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i8x16.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <16 x i8> %x, <16 x i8> %y @@ -153,12 +140,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const -1 -; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i16x8.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %res @@ -171,10 +154,9 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.const 31 -; CHECK-NEXT: i32.shr_s -; CHECK-NEXT: i16x8.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.lt_s +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <8 x i16> %x, <8 x i16> %y @@ -187,12 +169,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const -1 -; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i16x8.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <8 x i16> %x, <8 x i16> %y @@ -203,14 +181,10 @@ ; CHECK-LABEL: select_eq_v8i16: ; CHECK: .functype select_eq_v8i16 (i32, v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: i32.const -1 +; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i16x8.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <8 x i16> %x, <8 x i16> %y @@ -257,12 +231,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const -1 -; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %res @@ -275,10 +245,9 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.const 31 -; CHECK-NEXT: i32.shr_s -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.lt_s +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <4 x i32> %x, <4 x i32> %y @@ -291,12 +260,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const -1 -; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <4 x i32> %x, <4 x i32> %y @@ -307,14 +272,10 @@ ; CHECK-LABEL: select_eq_v4i32: ; CHECK: .functype select_eq_v4i32 (i32, v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: i32.const -1 +; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <4 x i32> %x, <4 x i32> %y @@ -376,12 +337,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <2 x i64> %x, <2 x i64> %y ret <2 x i64> %res @@ -393,14 +350,10 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: i32.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <2 x i64> %x, <2 x i64> %y @@ -413,12 +366,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <2 x i64> %x, <2 x i64> %y @@ -429,14 +378,10 @@ ; CHECK-LABEL: select_eq_v2i64: ; CHECK: .functype select_eq_v2i64 (i32, v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const -1 +; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <2 x i64> %x, <2 x i64> %y @@ -483,12 +428,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const -1 -; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <4 x float> %x, <4 x float> %y ret <4 x float> %res @@ -501,10 +442,9 @@ ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.const 31 -; CHECK-NEXT: i32.shr_s -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.lt_s +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <4 x float> %x, <4 x float> %y @@ -517,12 +457,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const -1 -; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <4 x float> %x, <4 x float> %y @@ -533,14 +469,10 @@ ; CHECK-LABEL: select_eq_v4f32: ; CHECK: .functype select_eq_v4f32 (i32, v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: i32.const -1 +; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.select -; CHECK-NEXT: i32x4.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <4 x float> %x, <4 x float> %y @@ -587,12 +519,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %res = select i1 %c, <2 x double> %x, <2 x double> %y ret <2 x double> %res @@ -604,14 +532,10 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: i32.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp slt i32 %i, 0 %res = select i1 %c, <2 x double> %x, <2 x double> %y @@ -624,12 +548,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp ne i32 %i, 0 %res = select i1 %c, <2 x double> %x, <2 x double> %y @@ -640,14 +560,10 @@ ; CHECK-LABEL: select_eq_v2f64: ; CHECK: .functype select_eq_v2f64 (i32, v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.const -1 +; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: v128.select ; CHECK-NEXT: # fallthrough-return %c = icmp eq i32 %i, 0 %res = select i1 %c, <2 x double> %x, <2 x double> %y