diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1218,9 +1218,6 @@ // uaddlp.4h v0, v0 // v4s16, v2s32 // uaddlp.2s v0, v0 // v2s32 - if (!ST->hasNEON() || - MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) - return false; MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; Register Dst = MI.getOperand(0).getReg(); Register Val = MI.getOperand(1).getReg(); @@ -1230,6 +1227,16 @@ "Expected src and dst to have the same type!"); unsigned Size = Ty.getSizeInBits(); + if (!ST->hasNEON() || + MI.getMF()->getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) { + if (Ty.isScalar() && (Size == 32 || Size == 64)) { + return Helper.lowerBitCount(MI) == + LegalizerHelper::LegalizeResult::Legalized; + } + + return false; + } + // Pre-conditioning: widen Val up to the nearest vector type. // s32,s64,v4s16,v2s32 -> v8i8 // v8s16,v4s32,v2s64 -> v16i8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir @@ -1,5 +1,5 @@ -# RUN: not --crash llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - 2>&1 | FileCheck %s -# CHECK: LLVM ERROR: unable to legalize instruction: %ctpop:_(s32) = G_CTPOP %copy:_(s32) (in function: s32) +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -run-pass=legalizer %s -o - 2>&1 | FileCheck %s --- | define void @s32() noimplicitfloat { unreachable } define void @s64() noimplicitfloat { unreachable } @@ -10,7 +10,73 @@ body: | bb.0: liveins: $w0 + ; CHECK-LABEL: name: s32 + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR %copy, [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1431655765 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB %copy, [[AND]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C2]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 858993459 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C3]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C3]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND1]], [[AND2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C4]](s64) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR2]], [[ADD]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CHECK-NEXT: %ctpop:_(s32) = G_LSHR [[MUL]], [[C7]](s64) + ; CHECK-NEXT: $w0 = COPY %ctpop(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy:_(s32) = COPY $w0 %ctpop:_(s32) = G_CTPOP %copy(s32) $w0 = COPY %ctpop(s32) RET_ReallyLR implicit $w0 + +... +--- +name: s64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: s64 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy:_(s64) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR %copy, [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 6148914691236517205 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB %copy, [[AND]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SUB]], [[C2]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3689348814741910323 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR1]], [[C3]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND1]], [[AND2]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C4]](s64) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR2]], [[ADD]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C5]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: %ctpop:_(s64) = G_LSHR [[MUL]], [[C7]](s64) + ; CHECK-NEXT: $x0 = COPY %ctpop(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %copy:_(s64) = COPY $x0 + %ctpop:_(s64) = G_CTPOP %copy(s64) + $x0 = COPY %ctpop(s64) + RET_ReallyLR implicit $x0 + +...