diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -4661,8 +4661,9 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec( MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const { assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); - assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!"); - if (DstTy.getSizeInBits() < 32) + unsigned DstSize = DstTy.getSizeInBits(); + assert(DstSize <= 128 && "Unexpected build_vec type!"); + if (DstSize < 32) return false; // Check if we're building a constant vector, in which case we want to // generate a constant pool load instead of a vector insert sequence. @@ -4683,6 +4684,24 @@ } Constant *CV = ConstantVector::get(Csts); MachineIRBuilder MIB(I); + if (CV->isNullValue()) { + // Until the importer can support immAllZerosV in pattern leaf nodes, + // select a zero move manually here. + Register DstReg = I.getOperand(0).getReg(); + if (DstSize == 128) { + auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0); + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); + } else if (DstSize == 64) { + auto Mov = + MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) + .addImm(0); + MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) + .addReg(Mov.getReg(0), 0, AArch64::dsub); + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI); + } + } auto *CPLoad = emitLoadFromConstantPool(CV, MIB); if (!CPLoad) { LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector"); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir @@ -1,28 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s ---- | - target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - target triple = "aarch64" - - define <4 x float> @test_f32(float %a, float %b, float %c, float %d) { - ret <4 x float> undef - } - - define <2 x double> @test_f64(double %a, double %b) { - ret <2 x double> undef - } - - define <4 x i32> @test_i32(i32 %a, i32 %b, i32 %c, i32 %d) { - ret <4 x i32> undef - } - - define <2 x i64> @test_i64(i64 %a, i64 %b) { - ret <2 x i64> undef - } - - define void @test_p0(i64 *%a, i64 *%b) { ret void } - -... --- name: test_f32 alignment: 4 @@ -33,7 +10,7 @@ failedISel: false tracksRegLiveness: true body: | - bb.0 (%ir-block.0): + bb.0: liveins: $s0, $s1, $s2, $s3 ; CHECK-LABEL: name: test_f32 @@ -74,7 +51,7 @@ failedISel: false tracksRegLiveness: true body: | - bb.0 (%ir-block.0): + bb.0: liveins: $d0, $d1, $d2, $d3 ; CHECK-LABEL: name: test_f64 @@ -105,7 +82,7 @@ failedISel: false tracksRegLiveness: true body: | - bb.0 (%ir-block.0): + bb.0: liveins: $w0, $w1, $w2, $w3 ; CHECK-LABEL: name: test_i32 @@ -140,7 +117,7 @@ failedISel: false tracksRegLiveness: true body: | - bb.0 (%ir-block.0): + bb.0: liveins: $x0, $x1 ; CHECK-LABEL: name: test_i64 @@ -169,7 +146,7 @@ failedISel: false tracksRegLiveness: true body: | - bb.0 (%ir-block.0): + bb.0: liveins: $x0, $x1 ; CHECK-LABEL: name: test_p0 @@ -188,3 +165,66 @@ RET_ReallyLR implicit $q0 ... +--- +name: test_v4s32_zero +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test_v4s32_zero + ; CHECK: liveins: $x0 + ; CHECK: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK: $q0 = COPY [[MOVIv2d_ns]] + ; CHECK: RET_ReallyLR + %0:gpr(p0) = COPY $x0 + %2:gpr(s32) = G_CONSTANT i32 0 + %3:fpr(s32) = COPY %2(s32) + %4:fpr(s32) = COPY %2(s32) + %5:fpr(s32) = COPY %2(s32) + %6:fpr(s32) = COPY %2(s32) + %1:fpr(<4 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32) + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR +... +--- +name: test_v8s8_zero +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: test_v8s8_zero + ; CHECK: liveins: $x0 + ; CHECK: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub + ; CHECK: $d0 = COPY [[COPY]] + ; CHECK: RET_ReallyLR + %0:gpr(p0) = COPY $x0 + %2:gpr(s8) = G_CONSTANT i8 0 + %3:fpr(s8) = COPY %2(s8) + %4:fpr(s8) = COPY %2(s8) + %5:fpr(s8) = COPY %2(s8) + %6:fpr(s8) = COPY %2(s8) + %7:fpr(s8) = COPY %2(s8) + %8:fpr(s8) = COPY %2(s8) + %9:fpr(s8) = COPY %2(s8) + %10:fpr(s8) = COPY %2(s8) + %1:fpr(<8 x s8>) = G_BUILD_VECTOR %3(s8), %4(s8), %5(s8), %6(s8), %7(s8), %8(s8), %9(s8), %10(s8) + $d0 = COPY %1(<8 x s8>) + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -956,8 +956,8 @@ ; DAG: abs.2s ; DAG-NEXT: ret -; GISEL: neg.2s -; GISEL: cmge.2s +; GISEL-DAG: neg.2s +; GISEL-DAG: cmge.2s ; GISEL: fcsel ; GISEL: fcsel %tmp1neg = sub <2 x i32> zeroinitializer, %a @@ -1007,8 +1007,8 @@ ; DAG: abs.8h ; DAG-NEXT: ret -; GISEL: cmgt.8h -; GISEL: sub.8h +; GISEL-DAG: cmgt.8h +; GISEL-DAG: sub.8h ; GISEL: csel ; GISEL: csel ; GISEL: csel @@ -1038,8 +1038,8 @@ ; DAG: abs.2d ; DAG-NEXT: ret -; GISEL: neg.2d -; GISEL: cmge.2d +; GISEL-DAG: neg.2d +; GISEL-DAG: cmge.2d ; GISEL: fcsel ; GISEL: fcsel %tmp1neg = sub <2 x i64> zeroinitializer, %a diff --git a/llvm/test/CodeGen/AArch64/combine-loads.ll b/llvm/test/CodeGen/AArch64/combine-loads.ll --- a/llvm/test/CodeGen/AArch64/combine-loads.ll +++ b/llvm/test/CodeGen/AArch64/combine-loads.ll @@ -4,8 +4,7 @@ define <2 x i64> @z(i64* nocapture nonnull readonly %p) { ; CHECK-LABEL: z: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ldr x9, [x0, #8] ; CHECK-NEXT: mov v0.d[0], x8