diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17815,7 +17815,8 @@ } case WebAssembly::BI__builtin_wasm_popcnt_i8x16: { Value *Vec = EmitScalarExpr(E->getArg(0)); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_popcnt); + Function *Callee = + CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType())); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_any_true_v128: diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -506,7 +506,7 @@ i8x16 popcnt(i8x16 x) { return __builtin_wasm_popcnt_i8x16(x); - // WEBASSEMBLY: call <16 x i8> @llvm.wasm.popcnt(<16 x i8> %x) + // WEBASSEMBLY: call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %x) // WEBASSEMBLY-NEXT: ret } diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c --- a/clang/test/Headers/wasm.c +++ b/clang/test/Headers/wasm.c @@ -1352,7 +1352,7 @@ // CHECK-LABEL: @test_i8x16_popcnt( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.wasm.popcnt(<16 x i8> [[TMP0]]) #[[ATTR6]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]]) #[[ATTR6]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -172,11 +172,6 @@ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; -// TODO: Replace this intrinsic with normal ISel patterns once popcnt is merged -// to the proposal. -def int_wasm_popcnt : - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem, IntrSpeculatable]>; - def int_wasm_extmul_low_signed : Intrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -212,6 +212,9 @@ for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) setOperationAction(Op, T, Legal); + // And we have popcnt for i8x16 + setOperationAction(ISD::CTPOP, MVT::v16i8, Legal); + // Expand float operations supported for scalars but not SIMD for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FRINT}) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -833,7 +833,7 @@ defm NEG : SIMDUnaryInt; // Population count: popcnt -defm POPCNT : SIMDUnary; +defm POPCNT : SIMDUnary; // Any lane true: any_true defm ANYTRUE : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), [], diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -78,9 +78,9 @@ ; CHECK-NEXT: .functype popcnt_v16i8 (v128) -> (v128){{$}} ; CHECK-NEXT: i8x16.popcnt $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <16 x i8> @llvm.wasm.popcnt(<16 x i8>) +declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) define <16 x i8> @popcnt_v16i8(<16 x i8> %x) { - %a = call <16 x i8> @llvm.wasm.popcnt(<16 x i8> %x) + %a = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %x) ret <16 x i8> %a } diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll --- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll @@ -10,7 +10,7 @@ ; ============================================================================== ; CHECK-LABEL: ctlz_v16i8: -; CHECK: i32.clz +; CHECK: i8x16.popcnt declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) define <16 x i8> @ctlz_v16i8(<16 x i8> %x) { %v = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %x, i1 false) @@ -18,14 +18,14 @@ } ; CHECK-LABEL: ctlz_v16i8_undef: -; CHECK: i32.clz +; CHECK: i8x16.popcnt define <16 x i8> @ctlz_v16i8_undef(<16 x i8> %x) { %v = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %x, i1 true) ret <16 x i8> %v } ; CHECK-LABEL: cttz_v16i8: -; CHECK: i32.ctz +; CHECK: i8x16.popcnt declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) define <16 x i8> @cttz_v16i8(<16 x i8> %x) { %v = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %x, i1 false) @@ -33,21 +33,12 @@ } ; CHECK-LABEL: cttz_v16i8_undef: -; CHECK: i32.ctz +; CHECK: i8x16.popcnt define <16 x i8> @cttz_v16i8_undef(<16 x i8> %x) { %v = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %x, i1 true) ret <16 x i8> %v } -; CHECK-LABEL: ctpop_v16i8: -; Note: expansion does not use i32.popcnt -; CHECK: v128.and -declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) -define <16 x i8> @ctpop_v16i8(<16 x i8> %x) { - %v = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %x) - ret <16 x i8> %v -} - ; CHECK-LABEL: sdiv_v16i8: ; CHECK: i32.div_s define <16 x i8> @sdiv_v16i8(<16 x i8> %x, <16 x i8> %y) {