diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3285,7 +3285,7 @@ if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) return UnableToLegalize; - NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); + NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType()); } else { NumParts = DstTy.getNumElements(); NarrowTy1 = SrcTy.getElementType(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -384,7 +384,8 @@ // FP conversions getActionDefinitionsBuilder(G_FPTRUNC).legalFor( - {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}); + {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}) + .clampMaxNumElements(0, s32, 2); getActionDefinitionsBuilder(G_FPEXT).legalFor( {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir @@ -0,0 +1,139 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=legalizer -O0 -global-isel %s -o - | FileCheck %s +--- +name: fptrunc_s16_s32 +body: | + bb.0: + liveins: $s0 + + ; CHECK-LABEL: name: fptrunc_s16_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[COPY]](s32) + ; CHECK: $h0 = COPY [[FPTRUNC]](s16) + ; CHECK: RET_ReallyLR implicit $h0 + %0:_(s32) = COPY $s0 + %1:_(s16) = G_FPTRUNC %0 + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 +... +--- +name: fptrunc_s16_s64 +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: fptrunc_s16_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[COPY]](s64) + ; CHECK: $h0 = COPY [[FPTRUNC]](s16) + ; CHECK: RET_ReallyLR implicit $h0 + %0:_(s64) = COPY $d0 + %1:_(s16) = G_FPTRUNC %0 + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 +... +--- +name: fptrunc_s32_s64 +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: fptrunc_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[COPY]](s64) + ; CHECK: $s0 = COPY [[FPTRUNC]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %0:_(s64) = COPY $d0 + %1:_(s32) = G_FPTRUNC %0 + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 +... +--- +name: fptrunc_v4s16_v4s32 +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: fptrunc_v4s16_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[COPY]](<4 x s32>) + ; CHECK: $d0 = COPY [[FPTRUNC]](<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s16>) = G_FPTRUNC %0 + $d0 = COPY %1(<4 x s16>) + RET_ReallyLR implicit $d0 +... +--- +name: fptrunc_v2s16_v2s32 +body: | + bb.0: + liveins: $d0 + + ; CHECK-LABEL: name: fptrunc_v2s16_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<2 x s16>) = G_FPTRUNC [[COPY]](<2 x s32>) + ; CHECK: $s0 = COPY [[FPTRUNC]](<2 x s16>) + ; CHECK: RET_ReallyLR implicit $s0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s16>) = G_FPTRUNC %0 + $s0 = COPY %1(<2 x s16>) + RET_ReallyLR implicit $s0 +... +--- +name: fptrunc_v4s32_v4s64 +body: | + bb.0: + + ; CHECK-LABEL: name: fptrunc_v4s32_v4s64 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[DEF]](<2 x s64>) + ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[DEF]](<2 x s64>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[FPTRUNC]](<2 x s32>), [[FPTRUNC1]](<2 x s32>) + ; CHECK: $q0 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s64>) = G_IMPLICIT_DEF + %1:_(<4 x s32>) = G_FPTRUNC %0 + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: fptrunc_v8s32_v8s64 +body: | + bb.0: + + liveins: $x0, $q0, $q1, $q2, $q3, $x0 + + ; CHECK-LABEL: name: fptrunc_v8s32_v8s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 + ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 + ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY]](<2 x s64>) + ; CHECK: [[FPTRUNC1:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY1]](<2 x s64>) + ; CHECK: [[FPTRUNC2:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY2]](<2 x s64>) + ; CHECK: [[FPTRUNC3:%[0-9]+]]:_(<2 x s32>) = G_FPTRUNC [[COPY3]](<2 x s64>) + ; CHECK: [[COPY5:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: G_STORE [[FPTRUNC]](<2 x s32>), [[COPY5]](p0) :: (store 8, align 32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C]](s64) + ; CHECK: G_STORE [[FPTRUNC1]](<2 x s32>), [[PTR_ADD]](p0) :: (store 8 + 8) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C1]](s64) + ; CHECK: G_STORE [[FPTRUNC2]](<2 x s32>), [[PTR_ADD1]](p0) :: (store 8 + 16, align 16) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C2]](s64) + ; CHECK: G_STORE [[FPTRUNC3]](<2 x s32>), [[PTR_ADD2]](p0) :: (store 8 + 24) + ; CHECK: RET_ReallyLR + %2:_(<2 x s64>) = COPY $q0 + %3:_(<2 x s64>) = COPY $q1 + %4:_(<2 x s64>) = COPY $q2 + %5:_(<2 x s64>) = COPY $q3 + %0:_(<8 x s64>) = G_CONCAT_VECTORS %2(<2 x s64>), %3(<2 x s64>), %4(<2 x s64>), %5(<2 x s64>) + %1:_(p0) = COPY $x0 + %6:_(<8 x s32>) = G_FPTRUNC %0(<8 x s64>) + %7:_(p0) = COPY $x0 + G_STORE %6(<8 x s32>), %7(p0) :: (store 32) + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll --- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll @@ -3,6 +3,7 @@ ; RUN: -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | \ ; RUN: FileCheck %s --check-prefixes=FALLBACK,CHECK +; FALLBACK-NOT: remark{{.*}}fcvtas_2s define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind { ;CHECK-LABEL: fcvtas_2s: ;CHECK-NOT: ld1 @@ -12,6 +13,7 @@ ret <2 x i32> %tmp3 } +; FALLBACK-NOT: remark{{.*}}fcvtas_4s define <4 x i32> @fcvtas_4s(<4 x float> %A) nounwind { ;CHECK-LABEL: fcvtas_4s: ;CHECK-NOT: ld1 @@ -21,6 +23,7 @@ ret <4 x i32> %tmp3 } +; FALLBACK-NOT: remark{{.*}}fcvtas_2d define <2 x i64> @fcvtas_2d(<2 x double> %A) nounwind { ;CHECK-LABEL: fcvtas_2d: ;CHECK-NOT: ld1