diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -781,21 +781,42 @@ getActionDefinitionsBuilder({G_SBFX, G_UBFX}) .customFor({{s32, s32}, {s64, s64}}); + + bool HasCSSC = ST.hasCSSC(); auto always = [=](const LegalityQuery &Q) { return true; }; getActionDefinitionsBuilder(G_CTPOP) - .legalFor({{v8s8, v8s8}, {v16s8, v16s8}}) + .legalIf([=](const LegalityQuery &Query) { + if (HasCSSC && typePairInSet(0, 1, { + {s32, s32}, + {s64, s64}, + })(Query)) + return true; + + return typePairInSet(0, 1, { + {v8s8, v8s8}, + {v16s8, v16s8}, + })(Query); + }) .clampScalar(0, s32, s128) .widenScalarToNextPow2(0) .minScalarEltSameAsIf(always, 1, 0) .maxScalarEltSameAsIf(always, 1, 0) - .customFor({{s32, s32}, - {s64, s64}, - {s128, s128}, - {v2s64, v2s64}, - {v2s32, v2s32}, - {v4s32, v4s32}, - {v4s16, v4s16}, - {v8s16, v8s16}}); + .customIf([=](const LegalityQuery &Query) { + if (!HasCSSC && typePairInSet(0, 1, { + {s32, s32}, + {s64, s64}, + })(Query)) + return true; + + return typePairInSet(0, 1, { + {s128, s128}, + {v2s64, v2s64}, + {v2s32, v2s32}, + {v4s32, v4s32}, + {v4s16, v4s16}, + {v8s16, v8s16}, + })(Query); + }); // TODO: Vector types. getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0)); @@ -1259,10 +1280,26 @@ Register Dst = MI.getOperand(0).getReg(); Register Val = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(Val); + unsigned Size = Ty.getSizeInBits(); assert(Ty == MRI.getType(Dst) && "Expected src and dst to have the same type!"); - unsigned Size = Ty.getSizeInBits(); + + if (ST->hasCSSC() && Ty.isScalar() && Size == 128) { + LLT s64 = LLT::scalar(64); + + auto Split = MIRBuilder.buildUnmerge(s64, Val); + auto CTPOP1 = MIRBuilder.buildCTPOP(s64, Split->getOperand(0)); + Register Pop1Reg = CTPOP1.getReg(0); + auto CTPOP2 = MIRBuilder.buildCTPOP(s64, Split->getOperand(1)); + Register Pop2Reg = CTPOP2.getReg(0); + auto PopRes = MRI.createGenericVirtualRegister(s64); + MIRBuilder.buildAdd(PopRes, Pop1Reg, Pop2Reg); + + MIRBuilder.buildZExt(Dst, PopRes); + MI.eraseFromParent(); + return true; + } if (!ST->hasNEON() || MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - 2>&1 | FileCheck %s +# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer -mattr=+cssc %s -o - | FileCheck %s --check-prefix=CHECK-CSSC --- | define void @s32() noimplicitfloat { unreachable } define void @s64() noimplicitfloat { unreachable } @@ -36,6 +37,13 @@ ; CHECK-NEXT: %ctpop:_(s32) = G_LSHR [[MUL]], [[C7]](s64) ; CHECK-NEXT: $w0 = COPY %ctpop(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK-CSSC-LABEL: name: s32 + ; CHECK-CSSC: liveins: $w0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-CSSC-NEXT: %ctpop:_(s32) = G_CTPOP %copy(s32) + ; CHECK-CSSC-NEXT: $w0 = COPY %ctpop(s32) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0 %copy:_(s32) = COPY $w0 %ctpop:_(s32) = G_CTPOP %copy(s32) $w0 = COPY %ctpop(s32) @@ -74,6 +82,13 @@ ; CHECK-NEXT: %ctpop:_(s64) = G_LSHR [[MUL]], [[C7]](s64) ; CHECK-NEXT: $x0 = COPY %ctpop(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; CHECK-CSSC-LABEL: name: s64 + ; CHECK-CSSC: liveins: $x0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: %copy:_(s64) = COPY $x0 + ; CHECK-CSSC-NEXT: %ctpop:_(s64) = G_CTPOP %copy(s64) + ; CHECK-CSSC-NEXT: $x0 = COPY %ctpop(s64) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0 %copy:_(s64) = COPY $x0 %ctpop:_(s64) = G_CTPOP %copy(s64) $x0 = COPY %ctpop(s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer -mattr=+cssc %s -o - | FileCheck %s --check-prefix=CHECK-CSSC ... --- name: v8s8_legal @@ -9,10 +10,18 @@ liveins: $d0 ; CHECK-LABEL: name: v8s8_legal ; CHECK: liveins: $d0 - ; CHECK: %copy:_(<8 x s8>) = COPY $d0 - ; CHECK: %ctpop:_(<8 x s8>) = G_CTPOP %copy(<8 x s8>) - ; CHECK: $d0 = COPY %ctpop(<8 x s8>) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(<8 x s8>) = COPY $d0 + ; CHECK-NEXT: %ctpop:_(<8 x s8>) = G_CTPOP %copy(<8 x s8>) + ; CHECK-NEXT: $d0 = COPY %ctpop(<8 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ; CHECK-CSSC-LABEL: name: v8s8_legal + ; CHECK-CSSC: liveins: $d0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: %copy:_(<8 x s8>) = COPY $d0 + ; CHECK-CSSC-NEXT: %ctpop:_(<8 x s8>) = G_CTPOP %copy(<8 x s8>) + ; CHECK-CSSC-NEXT: $d0 = COPY %ctpop(<8 x s8>) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $d0 %copy:_(<8 x s8>) = COPY $d0 %ctpop:_(<8 x s8>) = G_CTPOP %copy(<8 x s8>) $d0 = COPY %ctpop(<8 x s8>) @@ -27,10 +36,18 @@ liveins: $q0 ; CHECK-LABEL: name: v16s8_legal ; CHECK: liveins: $q0 - ; CHECK: %copy:_(<16 x s8>) = COPY $q0 - ; CHECK: %ctpop:_(<16 x s8>) = G_CTPOP %copy(<16 x s8>) - ; CHECK: $q0 = COPY %ctpop(<16 x s8>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(<16 x s8>) = COPY $q0 + ; CHECK-NEXT: %ctpop:_(<16 x s8>) = G_CTPOP %copy(<16 x s8>) + ; CHECK-NEXT: $q0 = COPY %ctpop(<16 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK-CSSC-LABEL: name: v16s8_legal + ; CHECK-CSSC: liveins: $q0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: %copy:_(<16 x s8>) = COPY $q0 + ; CHECK-CSSC-NEXT: %ctpop:_(<16 x s8>) = G_CTPOP %copy(<16 x s8>) + ; CHECK-CSSC-NEXT: $q0 = COPY %ctpop(<16 x s8>) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0 %copy:_(<16 x s8>) = COPY $q0 %ctpop:_(<16 x s8>) = G_CTPOP %copy(<16 x s8>) $q0 = COPY %ctpop(<16 x s8>) @@ -45,13 +62,21 @@ liveins: $w0 ; CHECK-LABEL: name: s32_lower ; CHECK: liveins: $w0 - ; CHECK: %copy:_(s32) = COPY $w0 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %copy(s32) - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[ZEXT]](s64) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) - ; CHECK: %ctpop:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) - ; CHECK: $w0 = COPY %ctpop(s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %copy(s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[ZEXT]](s64) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) + ; CHECK-NEXT: %ctpop:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) + ; CHECK-NEXT: $w0 = COPY %ctpop(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK-CSSC-LABEL: name: s32_lower + ; CHECK-CSSC: liveins: $w0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-CSSC-NEXT: %ctpop:_(s32) = G_CTPOP %copy(s32) + ; CHECK-CSSC-NEXT: $w0 = COPY %ctpop(s32) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0 %copy:_(s32) = COPY $w0 %ctpop:_(s32) = G_CTPOP %copy(s32) $w0 = COPY %ctpop(s32) @@ -66,13 +91,21 @@ liveins: $x0 ; CHECK-LABEL: name: s64_lower ; CHECK: liveins: $x0 - ; CHECK: %copy:_(s64) = COPY $x0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST %copy(s64) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) - ; CHECK: %ctpop:_(s64) = G_ZEXT [[INT]](s32) - ; CHECK: $x0 = COPY %ctpop(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s64) = COPY $x0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST %copy(s64) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) + ; CHECK-NEXT: %ctpop:_(s64) = G_ZEXT [[INT]](s32) + ; CHECK-NEXT: $x0 = COPY %ctpop(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; CHECK-CSSC-LABEL: name: s64_lower + ; CHECK-CSSC: liveins: $x0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: %copy:_(s64) = COPY $x0 + ; CHECK-CSSC-NEXT: %ctpop:_(s64) = G_CTPOP %copy(s64) + ; CHECK-CSSC-NEXT: $x0 = COPY %ctpop(s64) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0 %copy:_(s64) = COPY $x0 %ctpop:_(s64) = G_CTPOP %copy(s64) $x0 = COPY %ctpop(s64) @@ -84,23 +117,43 @@ tracksRegLiveness: true body: | bb.0: - liveins: $q0 + liveins: $x0, $x1 + ; CHECK-LABEL: name: s128_lower - ; CHECK: liveins: $q0 - ; CHECK: %copy:_(s128) = COPY $q0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST %copy(s128) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<16 x s8>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[INT]](s32), [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: %ctpop:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) - ; CHECK: $q0 = COPY %ctpop(s128) - ; CHECK: RET_ReallyLR implicit $q0 - %copy:_(s128) = COPY $q0 - %ctpop:_(s128) = G_CTPOP %copy(s128) - $q0 = COPY %ctpop(s128) - RET_ReallyLR implicit $q0 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[MV]](s128) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<16 x s8>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[INT]](s32), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: $x0 = COPY [[MV1]](s64) + ; CHECK-NEXT: $x1 = COPY [[C1]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + ; CHECK-CSSC-LABEL: name: s128_lower + ; CHECK-CSSC: liveins: $x0, $x1 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-CSSC-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(s64) = G_CTPOP [[COPY]](s64) + ; CHECK-CSSC-NEXT: [[CTPOP1:%[0-9]+]]:_(s64) = G_CTPOP [[COPY1]](s64) + ; CHECK-CSSC-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTPOP]], [[CTPOP1]] + ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-CSSC-NEXT: $x0 = COPY [[ADD]](s64) + ; CHECK-CSSC-NEXT: $x1 = COPY [[C]](s64) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %1:_(s64) = COPY $x0 + %2:_(s64) = COPY $x1 + %0:_(s128) = G_MERGE_VALUES %1(s64), %2(s64) + %3:_(s128) = G_CTPOP %0(s128) + %4:_(s64), %5:_(s64) = G_UNMERGE_VALUES %3(s128) + $x0 = COPY %4(s64) + $x1 = COPY %5(s64) + RET_ReallyLR implicit $x0, implicit $x1 ... --- @@ -112,16 +165,27 @@ ; CHECK-LABEL: name: widen_s16 ; CHECK: liveins: $w0 - ; CHECK: %copy:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32) - ; CHECK: $w0 = COPY [[COPY]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32) + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK-CSSC-LABEL: name: widen_s16 + ; CHECK-CSSC: liveins: $w0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %copy, [[C]] + ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) + ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0 %copy:_(s32) = COPY $w0 %trunc:_(s16) = G_TRUNC %copy(s32) %ctpop:_(s16) = G_CTPOP %trunc(s16) @@ -139,16 +203,27 @@ ; CHECK-LABEL: name: widen_s8 ; CHECK: liveins: $w0 - ; CHECK: %copy:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32) - ; CHECK: $w0 = COPY [[COPY]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32) + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK-CSSC-LABEL: name: widen_s8 + ; CHECK-CSSC: liveins: $w0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %copy, [[C]] + ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) + ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0 %copy:_(s32) = COPY $w0 %trunc:_(s8) = G_TRUNC %copy(s32) %ctpop:_(s8) = G_CTPOP %trunc(s8) @@ -166,16 +241,27 @@ ; CHECK-LABEL: name: widen_s3 ; CHECK: liveins: $w0 - ; CHECK: %copy:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32) - ; CHECK: $w0 = COPY [[COPY]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32) + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK-CSSC-LABEL: name: widen_s3 + ; CHECK-CSSC: liveins: $w0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %copy, [[C]] + ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) + ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0 %copy:_(s32) = COPY $w0 %trunc:_(s3) = G_TRUNC %copy(s32) %ctpop:_(s3) = G_CTPOP %trunc(s3) @@ -192,16 +278,27 @@ liveins: $w0 ; CHECK-LABEL: name: different_sizes ; CHECK: liveins: $w0 - ; CHECK: %copy:_(s32) = COPY $w0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32) - ; CHECK: $w0 = COPY [[COPY]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %copy(s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[AND]](s64) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<8 x s8>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[INT]](s32) + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK-CSSC-LABEL: name: different_sizes + ; CHECK-CSSC: liveins: $w0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-CSSC-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-CSSC-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND %copy, [[C]] + ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(s32) = G_CTPOP [[AND]](s32) + ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[CTPOP]](s32) + ; CHECK-CSSC-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $w0 %copy:_(s32) = COPY $w0 %trunc:_(s8) = G_TRUNC %copy(s32) %ctpop:_(s16) = G_CTPOP %trunc(s8) @@ -219,12 +316,22 @@ ; CHECK-LABEL: name: custom_8x16 ; CHECK: liveins: $q0 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>) - ; CHECK: $q0 = COPY [[INT]](<8 x s16>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>) + ; CHECK-NEXT: $q0 = COPY [[INT]](<8 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK-CSSC-LABEL: name: custom_8x16 + ; CHECK-CSSC: liveins: $q0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>) + ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) + ; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>) + ; CHECK-CSSC-NEXT: $q0 = COPY [[INT]](<8 x s16>) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 %1:_(<8 x s16>) = G_CTPOP %0(<8 x s16>) $q0 = COPY %1(<8 x s16>) @@ -240,13 +347,24 @@ ; CHECK-LABEL: name: custom_4x32 ; CHECK: liveins: $q0 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>) - ; CHECK: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>) - ; CHECK: $q0 = COPY [[INT1]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>) + ; CHECK-NEXT: $q0 = COPY [[INT1]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK-CSSC-LABEL: name: custom_4x32 + ; CHECK-CSSC: liveins: $q0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>) + ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) + ; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>) + ; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>) + ; CHECK-CSSC-NEXT: $q0 = COPY [[INT1]](<4 x s32>) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = G_CTPOP %0(<4 x s32>) $q0 = COPY %1(<4 x s32>) @@ -262,14 +380,26 @@ ; CHECK-LABEL: name: custom_2x64 ; CHECK: liveins: $q0 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>) - ; CHECK: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>) - ; CHECK: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>) - ; CHECK: $q0 = COPY [[INT2]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>) + ; CHECK-NEXT: $q0 = COPY [[INT2]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + ; CHECK-CSSC-LABEL: name: custom_2x64 + ; CHECK-CSSC: liveins: $q0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) + ; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<16 x s8>) + ; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<8 x s16>) + ; CHECK-CSSC-NEXT: [[INT2:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT1]](<4 x s32>) + ; CHECK-CSSC-NEXT: $q0 = COPY [[INT2]](<2 x s64>) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x s64>) = COPY $q0 %1:_(<2 x s64>) = G_CTPOP %0(<2 x s64>) $q0 = COPY %1(<2 x s64>) @@ -285,12 +415,22 @@ ; CHECK-LABEL: name: custom_4x16 ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>) - ; CHECK: $d0 = COPY [[INT]](<4 x s16>) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>) + ; CHECK-NEXT: $d0 = COPY [[INT]](<4 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ; CHECK-CSSC-LABEL: name: custom_4x16 + ; CHECK-CSSC: liveins: $d0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<4 x s16>) + ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) + ; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>) + ; CHECK-CSSC-NEXT: $d0 = COPY [[INT]](<4 x s16>) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = G_CTPOP %0(<4 x s16>) $d0 = COPY %1(<4 x s16>) @@ -306,13 +446,24 @@ ; CHECK-LABEL: name: custom_2x32 ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>) - ; CHECK: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>) - ; CHECK: $d0 = COPY [[INT1]](<2 x s32>) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>) + ; CHECK-NEXT: $d0 = COPY [[INT1]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + ; CHECK-CSSC-LABEL: name: custom_2x32 + ; CHECK-CSSC: liveins: $d0 + ; CHECK-CSSC-NEXT: {{ $}} + ; CHECK-CSSC-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-CSSC-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK-CSSC-NEXT: [[CTPOP:%[0-9]+]]:_(<8 x s8>) = G_CTPOP [[BITCAST]](<8 x s8>) + ; CHECK-CSSC-NEXT: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[CTPOP]](<8 x s8>) + ; CHECK-CSSC-NEXT: [[INT1:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlp), [[INT]](<4 x s16>) + ; CHECK-CSSC-NEXT: $d0 = COPY [[INT1]](<2 x s32>) + ; CHECK-CSSC-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = G_CTPOP %0(<2 x s32>) $d0 = COPY %1(<2 x s32>)