diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -113,5 +113,7 @@ TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128") +TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16cV16cV16cIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14173,7 +14173,20 @@ Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } - + case WebAssembly::BI__builtin_wasm_shuffle_v8x16: { + Value *Ops[18]; + size_t OpIdx = 0; + Ops[OpIdx++] = EmitScalarExpr(E->getArg(0)); + Ops[OpIdx++] = EmitScalarExpr(E->getArg(1)); + while (OpIdx < 18) { + llvm::APSInt LaneConst; + if (!E->getArg(OpIdx)->isIntegerConstantExpr(LaneConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), LaneConst); + } + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); + return Builder.CreateCall(Callee, Ops); + } default: return nullptr; } diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -435,3 +435,13 @@ // WEBASSEMBLY: call <2 x i64> @llvm.wasm.trunc.saturate.unsigned.v2i64.v2f64(<2 x double> %f) // WEBASSEMBLY-NEXT: ret } + +i8x16 shuffle(i8x16 x, i8x16 y) { + return __builtin_wasm_shuffle_v8x16(x, y, 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15); + // WEBASSEMBLY: call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y, + // WEBASSEMBLY-SAME: i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, + // WEBASSEMBLY-SAME: i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, + // WEBASSEMBLY-SAME: i32 15 + // WEBASSEMBLY-NEXT: ret +} diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -109,6 +109,13 @@ Intrinsic<[llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_shuffle : + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// // Bulk memory intrinsics diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1235,6 +1235,20 @@ Op.getOperand(3) // thrown value }); } + + case Intrinsic::wasm_shuffle: { + // Drop in-chain and replace undefs, but otherwise pass through unchanged + SDValue Ops[18]; + size_t OpIdx = 0; + Ops[OpIdx++] = Op.getOperand(1); + Ops[OpIdx++] = Op.getOperand(2); + while (OpIdx < 18) { + const SDValue &MaskIdx = Op.getOperand(OpIdx + 1); + Ops[OpIdx++] = + MaskIdx.isUndef() ? DAG.getConstant(0, DL, MVT::i32) : MaskIdx; + } + return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); + } } } diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -87,6 +87,36 @@ ret <16 x i8> %a } +; CHECK-LABEL: shuffle_v16i8: +; NO-SIMD128-NOT: v8x16 +; SIMD128-NEXT: .functype shuffle_v16i8 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-SAME: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <16 x i8> @llvm.wasm.shuffle( + <16 x i8>, <16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, + i32, i32, i32, i32, i32) +define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { + %res = call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y, + i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, + i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15) + ret <16 x i8> %res +} + +; CHECK-LABEL: shuffle_undef_v16i8: +; NO-SIMD128-NOT: v8x16 +; SIMD128-NEXT: .functype shuffle_undef_v16i8 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: v8x16.shuffle $push[[R:[0-9]+]]=, $0, $1, +; SIMD128-SAME: 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { + %res = call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y, + i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, + i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, + i32 undef, i32 undef, i32 undef, i32 2) + ret <16 x i8> %res +} + ; ============================================================================== ; 8 x i16 ; ==============================================================================