diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -193,6 +193,7 @@ bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); + bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); unsigned emitConstantPoolEntry(const Constant *CPVal, MachineFunction &MF) const; @@ -2166,6 +2167,12 @@ I.eraseFromParent(); return true; } + case TargetOpcode::G_SEXT: + // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV + // over a normal extend. + if (selectUSMovFromExtend(I, MRI)) + return true; + return false; case TargetOpcode::G_BR: return false; case TargetOpcode::G_SHL: @@ -3054,6 +3061,9 @@ } case TargetOpcode::G_ANYEXT: { + if (selectUSMovFromExtend(I, MRI)) + return true; + const Register DstReg = I.getOperand(0).getReg(); const Register SrcReg = I.getOperand(1).getReg(); @@ -3100,6 +3110,9 @@ case TargetOpcode::G_ZEXT: case TargetOpcode::G_SEXT_INREG: case TargetOpcode::G_SEXT: { + if (selectUSMovFromExtend(I, MRI)) + return true; + unsigned Opcode = I.getOpcode(); const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; const Register DefReg = I.getOperand(0).getReg(); @@ -4851,6 +4864,68 @@ return InsElt; } +bool AArch64InstructionSelector::selectUSMovFromExtend( + MachineInstr &MI, MachineRegisterInfo &MRI) { + if (MI.getOpcode() != TargetOpcode::G_SEXT && + MI.getOpcode() != TargetOpcode::G_ZEXT && + MI.getOpcode() != TargetOpcode::G_ANYEXT) + return false; + bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT; + const Register DefReg = MI.getOperand(0).getReg(); + const LLT DstTy = MRI.getType(DefReg); + unsigned DstSize = DstTy.getSizeInBits(); + + MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT, + MI.getOperand(1).getReg(), MRI); + int64_t Lane; + if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane))) + return false; + Register Src0 = Extract->getOperand(1).getReg(); + + const LLT &VecTy = MRI.getType(Src0); + + if (VecTy.getSizeInBits() != 128) { + const MachineInstr *ScalarToVector = emitScalarToVector( + VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB); + assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!"); + Src0 = ScalarToVector->getOperand(0).getReg(); + } + + unsigned Opcode; + if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32) + Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32; + else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16) + Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16; + else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8) + Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8; + else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16) + Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16; + else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8) + Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8; + else + llvm_unreachable("Unexpected type combo for S/UMov!"); + + // We may need to generate one of these, depending on the type and sign of the + // input: + // DstReg = SMOV Src0, Lane; + // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32; + MachineInstr *ExtI = nullptr; + if (DstSize == 64 && !IsSigned) { + Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane); + ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) + .addImm(0) + .addUse(NewReg) + .addImm(AArch64::sub_32); + RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); + } else + ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane); + + constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); + MI.eraseFromParent(); + return true; +} + bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) { assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -576,7 +576,8 @@ const LLT &VecTy = Query.Types[1]; return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 || - VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0; + VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 || + VecTy == v2p0; }) .minScalarOrEltIf( [=](const LegalityQuery &Query) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt-with-extend.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt-with-extend.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt-with-extend.mir @@ -0,0 +1,303 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: si64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: si64 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SMOVvi32to64_:%[0-9]+]]:gpr64 = SMOVvi32to64 [[COPY]], 1 + ; CHECK: $x0 = COPY [[SMOVvi32to64_]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:fpr(<4 x s32>) = COPY $q0 + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<4 x s32>), %3(s64) + %5:gpr(s32) = COPY %2(s32) + %4:gpr(s64) = G_SEXT %5(s32) + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: si64_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: si64_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[SMOVvi32to64_:%[0-9]+]]:gpr64 = SMOVvi32to64 [[INSERT_SUBREG]], 1 + ; CHECK: $x0 = COPY [[SMOVvi32to64_]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:fpr(<2 x s32>) = COPY $d0 + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64) + %5:gpr(s32) = COPY %2(s32) + %4:gpr(s64) = G_SEXT %5(s32) + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: zi64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: zi64 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UMOVvi32_:%[0-9]+]]:gpr32 = UMOVvi32 [[COPY]], 1 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[UMOVvi32_]], %subreg.sub_32 + ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:fpr(<4 x s32>) = COPY $q0 + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<4 x s32>), %3(s64) + %5:gpr(s32) = COPY %2(s32) + %4:gpr(s64) = G_ZEXT %5(s32) + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: zi64_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: zi64_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[UMOVvi32_:%[0-9]+]]:gpr32 = UMOVvi32 [[INSERT_SUBREG]], 1 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[UMOVvi32_]], %subreg.sub_32 + ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:fpr(<2 x s32>) = COPY $d0 + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64) + %5:gpr(s32) = COPY %2(s32) + %4:gpr(s64) = G_ZEXT %5(s32) + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: si32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: si32 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SMOVvi16to32_:%[0-9]+]]:gpr32 = SMOVvi16to32 [[COPY]], 1 + ; CHECK: $w0 = COPY [[SMOVvi16to32_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<8 x s16>) = COPY $q0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<8 x s16>), %4(s64) + %6:gpr(s16) = COPY %3(s16) + %5:gpr(s32) = G_SEXT %6(s16) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: zi32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: zi32 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UMOVvi16_:%[0-9]+]]:gpr32 = UMOVvi16 [[COPY]], 1 + ; CHECK: $w0 = COPY [[UMOVvi16_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<8 x s16>) = COPY $q0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<8 x s16>), %4(s64) + %6:gpr(s16) = COPY %3(s16) + %5:gpr(s32) = G_ZEXT %6(s16) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: si32_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: si32_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[SMOVvi16to32_:%[0-9]+]]:gpr32 = SMOVvi16to32 [[INSERT_SUBREG]], 1 + ; CHECK: $w0 = COPY [[SMOVvi16to32_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<4 x s16>) = COPY $d0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %4(s64) + %6:gpr(s16) = COPY %3(s16) + %5:gpr(s32) = G_SEXT %6(s16) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: zi32_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: zi32_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[UMOVvi16_:%[0-9]+]]:gpr32 = UMOVvi16 [[INSERT_SUBREG]], 1 + ; CHECK: $w0 = COPY [[UMOVvi16_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<4 x s16>) = COPY $d0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %4(s64) + %6:gpr(s16) = COPY %3(s16) + %5:gpr(s32) = G_ZEXT %6(s16) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: si16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: si16 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SMOVvi8to32_:%[0-9]+]]:gpr32 = SMOVvi8to32 [[COPY]], 1 + ; CHECK: $w0 = COPY [[SMOVvi8to32_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<16 x s8>) = COPY $q0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<16 x s8>), %4(s64) + %7:gpr(s8) = COPY %3(s8) + %6:gpr(s32) = G_SEXT %7(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 +... +--- +name: zi16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: zi16 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UMOVvi8_:%[0-9]+]]:gpr32 = UMOVvi8 [[COPY]], 1 + ; CHECK: $w0 = COPY [[UMOVvi8_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<16 x s8>) = COPY $q0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<16 x s8>), %4(s64) + %7:gpr(s8) = COPY %3(s8) + %6:gpr(s32) = G_ZEXT %7(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: si16_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: si16_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[SMOVvi8to32_:%[0-9]+]]:gpr32 = SMOVvi8to32 [[INSERT_SUBREG]], 1 + ; CHECK: $w0 = COPY [[SMOVvi8to32_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<8 x s8>) = COPY $d0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<8 x s8>), %4(s64) + %7:gpr(s8) = COPY %3(s8) + %6:gpr(s32) = G_SEXT %7(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 +... +--- +name: zi16_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: zi16_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[UMOVvi8_:%[0-9]+]]:gpr32 = UMOVvi8 [[INSERT_SUBREG]], 1 + ; CHECK: $w0 = COPY [[UMOVvi8_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<8 x s8>) = COPY $d0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<8 x s8>), %4(s64) + %7:gpr(s8) = COPY %3(s8) + %6:gpr(s32) = G_ZEXT %7(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir @@ -225,10 +225,8 @@ ; CHECK-LABEL: name: v16s8 ; CHECK: liveins: $q0 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr8 = COPY [[COPY]].bsub - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY1]], %subreg.bsub - ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]] - ; CHECK: $w0 = COPY [[COPY2]] + ; CHECK: [[UMOVvi8_:%[0-9]+]]:gpr32 = UMOVvi8 [[COPY]], 0 + ; CHECK: $w0 = COPY [[UMOVvi8_]] ; CHECK: RET_ReallyLR implicit $w0 %0:fpr(<16 x s8>) = COPY $q0 %2:gpr(s64) = G_CONSTANT i64 0 @@ -238,6 +236,35 @@ $w0 = COPY %3(s32) RET_ReallyLR implicit $w0 +... +--- +name: v8s8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$q0' } +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: v8s8 + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[UMOVvi8_:%[0-9]+]]:gpr32 = UMOVvi8 [[INSERT_SUBREG]], 0 + ; CHECK: $w0 = COPY [[UMOVvi8_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<8 x s8>) = COPY $d0 + %2:gpr(s64) = G_CONSTANT i64 0 + %1:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<8 x s8>), %2(s64) + %4:gpr(s8) = COPY %1(s8) + %3:gpr(s32) = G_ANYEXT %4(s8) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + ... --- name: v2p0 diff --git a/llvm/test/CodeGen/AArch64/extract-sext-zext.ll b/llvm/test/CodeGen/AArch64/extract-sext-zext.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/extract-sext-zext.ll @@ -0,0 +1,356 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ISEL +; RUN: llc -mtriple=aarch64-eabi -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GLOBAL + +define i64 @extract_v2i64(<2 x i64> %x, i32 %y) { +; CHECK-ISEL-LABEL: extract_v2i64: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: mov x0, v0.d[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: extract_v2i64: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: mov d0, v0.d[1] +; CHECK-GLOBAL-NEXT: fmov x0, d0 +; CHECK-GLOBAL-NEXT: ret + %ext = extractelement <2 x i64> %x, i32 1 + ret i64 %ext +} + +define i64 @extract_v1i64(<1 x i64> %x, i32 %y) { +; CHECK-ISEL-LABEL: extract_v1i64: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: extract_v1i64: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: fmov x0, d0 +; CHECK-GLOBAL-NEXT: ret + %ext = extractelement <1 x i64> %x, i32 1 + ret i64 %ext +} + +define i32 @extract_v4i32(<4 x i32> %x, i32 %y) { +; CHECK-ISEL-LABEL: extract_v4i32: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: mov w0, v0.s[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: extract_v4i32: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: mov s0, v0.s[1] +; CHECK-GLOBAL-NEXT: fmov w0, s0 +; CHECK-GLOBAL-NEXT: ret + %ext = extractelement <4 x i32> %x, i32 1 + ret i32 %ext +} + +define i32 @extract_v2i32(<2 x i32> %x, i32 %y) { +; CHECK-ISEL-LABEL: extract_v2i32: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-ISEL-NEXT: mov w0, v0.s[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: extract_v2i32: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GLOBAL-NEXT: mov s0, v0.s[1] +; CHECK-GLOBAL-NEXT: fmov w0, s0 +; CHECK-GLOBAL-NEXT: ret + %ext = extractelement <2 x i32> %x, i32 1 + ret i32 %ext +} + +define i16 @extract_v8i16(<8 x i16> %x, i32 %y) { +; CHECK-LABEL: extract_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[1] +; CHECK-NEXT: ret + %ext = extractelement <8 x i16> %x, i32 1 + ret i16 %ext +} + +define i16 @extract_v4i16(<4 x i16> %x, i32 %y) { +; CHECK-LABEL: extract_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w0, v0.h[1] +; CHECK-NEXT: ret + %ext = extractelement <4 x i16> %x, i32 1 + ret i16 %ext +} + +define i8 @extract_v16i8(<16 x i8> %x, i32 %y) { +; CHECK-LABEL: extract_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[1] +; CHECK-NEXT: ret + %ext = extractelement <16 x i8> %x, i32 1 + ret i8 %ext +} + +define i8 @extract_v8i8(<8 x i8> %x, i32 %y) { +; CHECK-LABEL: extract_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w0, v0.b[1] +; CHECK-NEXT: ret + %ext = extractelement <8 x i8> %x, i32 1 + ret i8 %ext +} + + +define i64 @sv2i32i64(<2 x i32> %x) { +; CHECK-LABEL: sv2i32i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov x0, v0.s[1] +; CHECK-NEXT: ret + %e = extractelement <2 x i32> %x, i64 1 + %s = sext i32 %e to i64 + ret i64 %s +} + +define i64 @sv4i32i64(<4 x i32> %x) { +; CHECK-LABEL: sv4i32i64: +; CHECK: // %bb.0: +; CHECK-NEXT: smov x0, v0.s[2] +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i64 2 + %s = sext i32 %e to i64 + ret i64 %s +} + +define i64 @sv4i16i64(<4 x i16> %x) { +; CHECK-LABEL: sv4i16i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov x0, v0.h[2] +; CHECK-NEXT: ret + %e = extractelement <4 x i16> %x, i64 2 + %s = sext i16 %e to i64 + ret i64 %s +} + +define i64 @sv8i16i64(<8 x i16> %x) { +; CHECK-LABEL: sv8i16i64: +; CHECK: // %bb.0: +; CHECK-NEXT: smov x0, v0.h[2] +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i64 2 + %s = sext i16 %e to i64 + ret i64 %s +} + +define i64 @sv8i8i64(<8 x i8> %x) { +; CHECK-LABEL: sv8i8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov x0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <8 x i8> %x, i64 2 + %s = sext i8 %e to i64 + ret i64 %s +} + +define i64 @sv16i8i64(<16 x i8> %x) { +; CHECK-LABEL: sv16i8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: smov x0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <16 x i8> %x, i64 2 + %s = sext i8 %e to i64 + ret i64 %s +} + +define i32 @sv8i16i32(<8 x i16> %x) { +; CHECK-LABEL: sv8i16i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smov w0, v0.h[2] +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i64 2 + %s = sext i16 %e to i32 + ret i32 %s +} + +define i32 @sv4i16i32(<4 x i16> %x) { +; CHECK-LABEL: sv4i16i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w0, v0.h[2] +; CHECK-NEXT: ret + %e = extractelement <4 x i16> %x, i64 2 + %s = sext i16 %e to i32 + ret i32 %s +} + +define i32 @sv16i8i32(<16 x i8> %x) { +; CHECK-LABEL: sv16i8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: smov w0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <16 x i8> %x, i64 2 + %s = sext i8 %e to i32 + ret i32 %s +} + +define i32 @sv8i8i32(<8 x i8> %x) { +; CHECK-LABEL: sv8i8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <8 x i8> %x, i64 2 + %s = sext i8 %e to i32 + ret i32 %s +} + +define i16 @sv16i8i16(<16 x i8> %x) { +; CHECK-LABEL: sv16i8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: smov w0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <16 x i8> %x, i64 2 + %s = sext i8 %e to i16 + ret i16 %s +} + +define i16 @sv8i8i16(<8 x i8> %x) { +; CHECK-LABEL: sv8i8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <8 x i8> %x, i64 2 + %s = sext i8 %e to i16 + ret i16 %s +} + + + +define i64 @zv2i32i64(<2 x i32> %x) { +; CHECK-LABEL: zv2i32i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov w0, v0.s[1] +; CHECK-NEXT: ret + %e = extractelement <2 x i32> %x, i64 1 + %s = zext i32 %e to i64 + ret i64 %s +} + +define i64 @zv4i32i64(<4 x i32> %x) { +; CHECK-LABEL: zv4i32i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, v0.s[2] +; CHECK-NEXT: ret + %e = extractelement <4 x i32> %x, i64 2 + %s = zext i32 %e to i64 + ret i64 %s +} + +define i64 @zv4i16i64(<4 x i16> %x) { +; CHECK-LABEL: zv4i16i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w0, v0.h[2] +; CHECK-NEXT: ret + %e = extractelement <4 x i16> %x, i64 2 + %s = zext i16 %e to i64 + ret i64 %s +} + +define i64 @zv8i16i64(<8 x i16> %x) { +; CHECK-LABEL: zv8i16i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[2] +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i64 2 + %s = zext i16 %e to i64 + ret i64 %s +} + +define i64 @zv8i8i64(<8 x i8> %x) { +; CHECK-LABEL: zv8i8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <8 x i8> %x, i64 2 + %s = zext i8 %e to i64 + ret i64 %s +} + +define i64 @zv16i8i64(<16 x i8> %x) { +; CHECK-LABEL: zv16i8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <16 x i8> %x, i64 2 + %s = zext i8 %e to i64 + ret i64 %s +} + +define i32 @zv8i16i32(<8 x i16> %x) { +; CHECK-LABEL: zv8i16i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.h[2] +; CHECK-NEXT: ret + %e = extractelement <8 x i16> %x, i64 2 + %s = zext i16 %e to i32 + ret i32 %s +} + +define i32 @zv4i16i32(<4 x i16> %x) { +; CHECK-LABEL: zv4i16i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w0, v0.h[2] +; CHECK-NEXT: ret + %e = extractelement <4 x i16> %x, i64 2 + %s = zext i16 %e to i32 + ret i32 %s +} + +define i32 @zv16i8i32(<16 x i8> %x) { +; CHECK-LABEL: zv16i8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <16 x i8> %x, i64 2 + %s = zext i8 %e to i32 + ret i32 %s +} + +define i32 @zv8i8i32(<8 x i8> %x) { +; CHECK-LABEL: zv8i8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <8 x i8> %x, i64 2 + %s = zext i8 %e to i32 + ret i32 %s +} + +define i16 @zv16i8i16(<16 x i8> %x) { +; CHECK-LABEL: zv16i8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: umov w0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <16 x i8> %x, i64 2 + %s = zext i8 %e to i16 + ret i16 %s +} + +define i16 @zv8i8i16(<8 x i8> %x) { +; CHECK-LABEL: zv8i8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w0, v0.b[2] +; CHECK-NEXT: ret + %e = extractelement <8 x i8> %x, i64 2 + %s = zext i8 %e to i16 + ret i16 %s +}