Index: llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -144,6 +144,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4916,6 +4916,13 @@ case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + case TargetOpcode::G_INTRINSIC_TRUNC: case TargetOpcode::G_BSWAP: case TargetOpcode::G_FCANONICALIZE: case TargetOpcode::G_SEXT_INREG: Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -190,8 +190,6 @@ MachineRegisterInfo &MRI); bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI); - bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; - bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); @@ -3494,10 +3492,6 @@ return false; return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - case TargetOpcode::G_INTRINSIC_TRUNC: - return selectIntrinsicTrunc(I, MRI); - case TargetOpcode::G_INTRINSIC_ROUND: - return selectIntrinsicRound(I, MRI); case TargetOpcode::G_BUILD_VECTOR: return selectBuildVector(I, MRI); case TargetOpcode::G_MERGE_VALUES: @@ -3696,116 +3690,6 @@ return true; } -bool AArch64InstructionSelector::selectIntrinsicTrunc( - MachineInstr &I, MachineRegisterInfo &MRI) const { - const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); - - // Select the correct opcode. - unsigned Opc = 0; - if (!SrcTy.isVector()) { - switch (SrcTy.getSizeInBits()) { - default: - case 16: - Opc = AArch64::FRINTZHr; - break; - case 32: - Opc = AArch64::FRINTZSr; - break; - case 64: - Opc = AArch64::FRINTZDr; - break; - } - } else { - unsigned NumElts = SrcTy.getNumElements(); - switch (SrcTy.getElementType().getSizeInBits()) { - default: - break; - case 16: - if (NumElts == 4) - Opc = AArch64::FRINTZv4f16; - else if (NumElts == 8) - Opc = AArch64::FRINTZv8f16; - break; - case 32: - if (NumElts == 2) - Opc = AArch64::FRINTZv2f32; - else if (NumElts == 4) - Opc = AArch64::FRINTZv4f32; - break; - case 64: - if (NumElts == 2) - Opc = AArch64::FRINTZv2f64; - break; - } - } - - if (!Opc) { - // Didn't get an opcode above, bail. - LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"); - return false; - } - - // Legalization would have set us up perfectly for this; we just need to - // set the opcode and move on. - I.setDesc(TII.get(Opc)); - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); -} - -bool AArch64InstructionSelector::selectIntrinsicRound( - MachineInstr &I, MachineRegisterInfo &MRI) const { - const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); - - // Select the correct opcode. - unsigned Opc = 0; - if (!SrcTy.isVector()) { - switch (SrcTy.getSizeInBits()) { - default: - case 16: - Opc = AArch64::FRINTAHr; - break; - case 32: - Opc = AArch64::FRINTASr; - break; - case 64: - Opc = AArch64::FRINTADr; - break; - } - } else { - unsigned NumElts = SrcTy.getNumElements(); - switch (SrcTy.getElementType().getSizeInBits()) { - default: - break; - case 16: - if (NumElts == 4) - Opc = AArch64::FRINTAv4f16; - else if (NumElts == 8) - Opc = AArch64::FRINTAv8f16; - break; - case 32: - if (NumElts == 2) - Opc = AArch64::FRINTAv2f32; - else if (NumElts == 4) - Opc = AArch64::FRINTAv4f32; - break; - case 64: - if (NumElts == 2) - Opc = AArch64::FRINTAv2f64; - break; - } - } - - if (!Opc) { - // Didn't get an opcode above, bail. - LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"); - return false; - } - - // Legalization would have set us up perfectly for this; we just need to - // set the opcode and move on. - I.setDesc(TII.get(Opc)); - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); -} - bool AArch64InstructionSelector::selectVectorICmp( MachineInstr &I, MachineRegisterInfo &MRI) { Register DstReg = I.getOperand(0).getReg(); Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -243,9 +243,7 @@ getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); - getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, - G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, - G_FNEARBYINT, G_INTRINSIC_LRINT}) + getActionDefinitionsBuilder({G_FMA, G_INTRINSIC_LRINT}) // If we don't have full FP16 support, then scalarize the elements of // vectors containing fp16 types. .fewerElementsIf( @@ -950,8 +948,10 @@ // TODO: Vector types. getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0)); - getActionDefinitionsBuilder( - {G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM, G_FMINIMUM}) + getActionDefinitionsBuilder({G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM, + G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, + G_FNEARBYINT, G_INTRINSIC_TRUNC, + G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN}) .legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64}) .legalIf([=](const LegalityQuery &Query) { const auto &Ty = Query.Types[0]; Index: llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -431,6 +431,7 @@ case TargetOpcode::G_FRINT: case TargetOpcode::G_INTRINSIC_TRUNC: case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FMINNUM: case TargetOpcode::G_FMAXIMUM: Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir @@ -25,33 +25,15 @@ ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]] - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; CHECK-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT1]] - ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL1]](s32) - ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; CHECK-NEXT: [[FCEIL2:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT2]] - ; CHECK-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL2]](s32) - ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; CHECK-NEXT: [[FCEIL3:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT3]] - ; CHECK-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL3]](s32) - ; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; CHECK-NEXT: [[FCEIL4:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT4]] - ; CHECK-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL4]](s32) - ; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; CHECK-NEXT: [[FCEIL5:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT5]] - ; CHECK-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL5]](s32) - ; CHECK-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) - ; CHECK-NEXT: [[FCEIL6:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT6]] - ; CHECK-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL6]](s32) - ; CHECK-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) - ; CHECK-NEXT: [[FCEIL7:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT7]] - ; CHECK-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL7]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16) - ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[FPEXT]] + ; CHECK-NEXT: [[FCEIL1:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[FPEXT1]] + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FCEIL]](<4 x s32>) + ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FCEIL1]](<4 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>) + ; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 %1:_(<8 x s16>) = G_FCEIL %0 @@ -73,21 +55,10 @@ ; CHECK: liveins: $d0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]] - ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32) - ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; CHECK-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT1]] - ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL1]](s32) - ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; CHECK-NEXT: [[FCEIL2:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT2]] - ; CHECK-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL2]](s32) - ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; CHECK-NEXT: [[FCEIL3:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT3]] - ; CHECK-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[FPEXT]] + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FCEIL]](<4 x s32>) + ; CHECK-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = G_FCEIL %0 Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-frint.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-frint.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-frint.mir @@ -149,21 +149,10 @@ ; NOFP16: liveins: $d0 ; NOFP16-NEXT: {{ $}} ; NOFP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; NOFP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]] - ; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32) - ; NOFP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; NOFP16-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT1]] - ; NOFP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT1]](s32) - ; NOFP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; NOFP16-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT2]] - ; NOFP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT2]](s32) - ; NOFP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; NOFP16-NEXT: [[FRINT3:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT3]] - ; NOFP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT3]](s32) - ; NOFP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; NOFP16-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>) + ; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(<4 x s32>) = G_FRINT [[FPEXT]] + ; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FRINT]](<4 x s32>) + ; NOFP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>) ; NOFP16-NEXT: RET_ReallyLR implicit $d0 ; ; FP16-LABEL: name: test_v4f16.rint @@ -192,33 +181,15 @@ ; NOFP16: liveins: $q0 ; NOFP16-NEXT: {{ $}} ; NOFP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; NOFP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]] - ; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32) - ; NOFP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; NOFP16-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT1]] - ; NOFP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT1]](s32) - ; NOFP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; NOFP16-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT2]] - ; NOFP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT2]](s32) - ; NOFP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; NOFP16-NEXT: [[FRINT3:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT3]] - ; NOFP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT3]](s32) - ; NOFP16-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; NOFP16-NEXT: [[FRINT4:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT4]] - ; NOFP16-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT4]](s32) - ; NOFP16-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; NOFP16-NEXT: [[FRINT5:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT5]] - ; NOFP16-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT5]](s32) - ; NOFP16-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) - ; NOFP16-NEXT: [[FRINT6:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT6]] - ; NOFP16-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT6]](s32) - ; NOFP16-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) - ; NOFP16-NEXT: [[FRINT7:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT7]] - ; NOFP16-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT7]](s32) - ; NOFP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16) - ; NOFP16-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; NOFP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; NOFP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(<4 x s32>) = G_FRINT [[FPEXT]] + ; NOFP16-NEXT: [[FRINT1:%[0-9]+]]:_(<4 x s32>) = G_FRINT [[FPEXT1]] + ; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FRINT]](<4 x s32>) + ; NOFP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FRINT1]](<4 x s32>) + ; NOFP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>) + ; NOFP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; NOFP16-NEXT: RET_ReallyLR implicit $q0 ; ; FP16-LABEL: name: test_v8f16.rint Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-round.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-round.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-round.mir @@ -109,33 +109,15 @@ ; NO-FP16: liveins: $q0 ; NO-FP16-NEXT: {{ $}} ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT]] - ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND]](s32) - ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT1]] - ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND1]](s32) - ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT2]] - ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND2]](s32) - ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND3:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT3]] - ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND3]](s32) - ; NO-FP16-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND4:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT4]] - ; NO-FP16-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND4]](s32) - ; NO-FP16-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND5:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT5]] - ; NO-FP16-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND5]](s32) - ; NO-FP16-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND6:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT6]] - ; NO-FP16-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND6]](s32) - ; NO-FP16-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND7:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT7]] - ; NO-FP16-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND7]](s32) - ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16) - ; NO-FP16-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; NO-FP16-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUND [[FPEXT]] + ; NO-FP16-NEXT: [[INTRINSIC_ROUND1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUND [[FPEXT1]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUND]](<4 x s32>) + ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUND1]](<4 x s32>) + ; NO-FP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>) + ; NO-FP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 ; ; FP16-LABEL: name: test_v8f16.round @@ -167,21 +149,10 @@ ; NO-FP16: liveins: $d0 ; NO-FP16-NEXT: {{ $}} ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT]] - ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND]](s32) - ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT1]] - ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND1]](s32) - ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT2]] - ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND2]](s32) - ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_ROUND3:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT3]] - ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND3]](s32) - ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; NO-FP16-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>) + ; NO-FP16-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUND [[FPEXT]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUND]](<4 x s32>) + ; NO-FP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>) ; NO-FP16-NEXT: RET_ReallyLR implicit $d0 ; ; FP16-LABEL: name: test_v4f16.round Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-roundeven.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-roundeven.mir @@ -0,0 +1,349 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py + +# RUN:llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=legalizer -mattr=-fullfp16 -o - | FileCheck %s --check-prefix=NO-FP16 +# RUN:llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=legalizer -mattr=+fullfp16 -o - | FileCheck %s --check-prefix=FP16 + +... +--- +name: test_f16.roundeven +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $h0 + + ; NO-FP16-LABEL: name: test_f16.roundeven + ; NO-FP16: liveins: $h0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[COPY]](s16) + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FPEXT]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUNDEVEN]](s32) + ; NO-FP16-NEXT: $h0 = COPY [[FPTRUNC]](s16) + ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 + ; + ; FP16-LABEL: name: test_f16.roundeven + ; FP16: liveins: $h0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s16) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; FP16-NEXT: $h0 = COPY [[INTRINSIC_ROUNDEVEN]](s16) + ; FP16-NEXT: RET_ReallyLR implicit $h0 + %0:_(s16) = COPY $h0 + %1:_(s16) = G_INTRINSIC_ROUNDEVEN %0 + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +... +--- +name: test_f32.roundeven +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $s0 + + ; NO-FP16-LABEL: name: test_f32.roundeven + ; NO-FP16: liveins: $s0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; NO-FP16-NEXT: $s0 = COPY [[INTRINSIC_ROUNDEVEN]](s32) + ; NO-FP16-NEXT: RET_ReallyLR implicit $s0 + ; + ; FP16-LABEL: name: test_f32.roundeven + ; FP16: liveins: $s0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; FP16-NEXT: $s0 = COPY [[INTRINSIC_ROUNDEVEN]](s32) + ; FP16-NEXT: RET_ReallyLR implicit $s0 + %0:_(s32) = COPY $s0 + %1:_(s32) = G_INTRINSIC_ROUNDEVEN %0 + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: test_f64.roundeven +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0 + + ; NO-FP16-LABEL: name: test_f64.roundeven + ; NO-FP16: liveins: $d0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s64) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; NO-FP16-NEXT: $d0 = COPY [[INTRINSIC_ROUNDEVEN]](s64) + ; NO-FP16-NEXT: RET_ReallyLR implicit $d0 + ; + ; FP16-LABEL: name: test_f64.roundeven + ; FP16: liveins: $d0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s64) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; FP16-NEXT: $d0 = COPY [[INTRINSIC_ROUNDEVEN]](s64) + ; FP16-NEXT: RET_ReallyLR implicit $d0 + %0:_(s64) = COPY $d0 + %1:_(s64) = G_INTRINSIC_ROUNDEVEN %0 + $d0 = COPY %1(s64) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v8f16.roundeven +alignment: 4 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + + ; NO-FP16-LABEL: name: test_v8f16.roundeven + ; NO-FP16: liveins: $q0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN [[FPEXT]] + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN [[FPEXT1]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUNDEVEN]](<4 x s32>) + ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUNDEVEN1]](<4 x s32>) + ; NO-FP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>) + ; NO-FP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 + ; + ; FP16-LABEL: name: test_v8f16.roundeven + ; FP16: liveins: $q0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<8 x s16>) + ; FP16-NEXT: RET_ReallyLR implicit $q0 + %0:_(<8 x s16>) = COPY $q0 + %1:_(<8 x s16>) = G_INTRINSIC_ROUNDEVEN %0 + $q0 = COPY %1(<8 x s16>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v4f16.roundeven +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0 + + ; NO-FP16-LABEL: name: test_v4f16.roundeven + ; NO-FP16: liveins: $d0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>) + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN [[FPEXT]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUNDEVEN]](<4 x s32>) + ; NO-FP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $d0 + ; + ; FP16-LABEL: name: test_v4f16.roundeven + ; FP16: liveins: $d0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; FP16-NEXT: $d0 = COPY [[INTRINSIC_ROUNDEVEN]](<4 x s16>) + ; FP16-NEXT: RET_ReallyLR implicit $d0 + %0:_(<4 x s16>) = COPY $d0 + %1:_(<4 x s16>) = G_INTRINSIC_ROUNDEVEN %0 + $d0 = COPY %1(<4 x s16>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v2f32.roundeven +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0 + + ; NO-FP16-LABEL: name: test_v2f32.roundeven + ; NO-FP16: liveins: $d0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; NO-FP16-NEXT: $d0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s32>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $d0 + ; + ; FP16-LABEL: name: test_v2f32.roundeven + ; FP16: liveins: $d0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; FP16-NEXT: $d0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s32>) + ; FP16-NEXT: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = G_INTRINSIC_ROUNDEVEN %0 + $d0 = COPY %1(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: test_v4f32.roundeven +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + + ; NO-FP16-LABEL: name: test_v4f32.roundeven + ; NO-FP16: liveins: $q0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; NO-FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<4 x s32>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 + ; + ; FP16-LABEL: name: test_v4f32.roundeven + ; FP16: liveins: $q0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<4 x s32>) + ; FP16-NEXT: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN %0 + $q0 = COPY %1(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2f64.roundeven +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $q0 + + ; NO-FP16-LABEL: name: test_v2f64.roundeven + ; NO-FP16: liveins: $q0 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; NO-FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s64>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 + ; + ; FP16-LABEL: name: test_v2f64.roundeven + ; FP16: liveins: $q0 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s64>) + ; FP16-NEXT: RET_ReallyLR implicit $q0 + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN %0 + $q0 = COPY %1(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v4f64.roundeven +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; NO-FP16-LABEL: name: test_v4f64.roundeven + ; NO-FP16: liveins: $q0, $q1 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY1]] + ; NO-FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s64>) + ; NO-FP16-NEXT: $q1 = COPY [[INTRINSIC_ROUNDEVEN1]](<2 x s64>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 + ; + ; FP16-LABEL: name: test_v4f64.roundeven + ; FP16: liveins: $q0, $q1 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY]] + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY1]] + ; FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s64>) + ; FP16-NEXT: $q1 = COPY [[INTRINSIC_ROUNDEVEN1]](<2 x s64>) + ; FP16-NEXT: RET_ReallyLR implicit $q0 + %0:_(<2 x s64>) = COPY $q0 + %1:_(<2 x s64>) = COPY $q1 + %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1 + %3:_(<4 x s64>) = G_INTRINSIC_ROUNDEVEN %2 + %4:_(<2 x s64>), %5:_(<2 x s64>) = G_UNMERGE_VALUES %3 + $q0 = COPY %4(<2 x s64>) + $q1 = COPY %5(<2 x s64>) + RET_ReallyLR implicit $q0 + +... +--- +name: test_v2f16.roundeven +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $s0, $s1 + + ; NO-FP16-LABEL: name: test_v2f16.roundeven + ; NO-FP16: liveins: $s0, $s1 + ; NO-FP16-NEXT: {{ $}} + ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0 + ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; NO-FP16-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[DEF]](s16), [[DEF]](s16) + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[BUILD_VECTOR]](<4 x s16>) + ; NO-FP16-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>) + ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC_ROUNDEVEN [[UV2]] + ; NO-FP16-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INTRINSIC_ROUNDEVEN]](<2 x s32>) + ; NO-FP16-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NO-FP16-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[DEF1]](s32), [[DEF1]](s32) + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[BUILD_VECTOR1]](<4 x s32>) + ; NO-FP16-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FPTRUNC]](<4 x s16>) + ; NO-FP16-NEXT: $s0 = COPY [[UV6]](<2 x s16>) + ; NO-FP16-NEXT: RET_ReallyLR implicit $s0 + ; + ; FP16-LABEL: name: test_v2f16.roundeven + ; FP16: liveins: $s0, $s1 + ; FP16-NEXT: {{ $}} + ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0 + ; FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; FP16-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[DEF]](s16), [[DEF]](s16) + ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC_ROUNDEVEN [[BUILD_VECTOR]] + ; FP16-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INTRINSIC_ROUNDEVEN]](<4 x s16>) + ; FP16-NEXT: $s0 = COPY [[UV2]](<2 x s16>) + ; FP16-NEXT: RET_ReallyLR implicit $s0 + %0:_(<2 x s16>) = COPY $s0 + %1:_(<2 x s16>) = G_INTRINSIC_ROUNDEVEN %0 + $s0 = COPY %1(<2 x s16>) + RET_ReallyLR implicit $s0 Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir @@ -46,21 +46,10 @@ ; NO-FP16: liveins: $d0 ; NO-FP16-NEXT: {{ $}} ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT1]] - ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT2]] - ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) - ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT3]] - ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32) - ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; NO-FP16-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>) + ; NO-FP16-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_TRUNC [[FPEXT]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_TRUNC]](<4 x s32>) + ; NO-FP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>) ; NO-FP16-NEXT: RET_ReallyLR implicit $d0 ; ; FP16-LABEL: name: test_v4f16.intrinsic_trunc @@ -89,33 +78,15 @@ ; NO-FP16: liveins: $q0 ; NO-FP16-NEXT: {{ $}} ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT1]] - ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT2]] - ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) - ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT3]] - ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32) - ; NO-FP16-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC4:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT4]] - ; NO-FP16-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC4]](s32) - ; NO-FP16-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC5:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT5]] - ; NO-FP16-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC5]](s32) - ; NO-FP16-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC6:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT6]] - ; NO-FP16-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC6]](s32) - ; NO-FP16-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) - ; NO-FP16-NEXT: [[INTRINSIC_TRUNC7:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] - ; NO-FP16-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC7]](s32) - ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16) - ; NO-FP16-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; NO-FP16-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_TRUNC [[FPEXT]] + ; NO-FP16-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_TRUNC [[FPEXT1]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_TRUNC]](<4 x s32>) + ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_TRUNC1]](<4 x s32>) + ; NO-FP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>) + ; NO-FP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 ; ; FP16-LABEL: name: test_v8f16.intrinsic_trunc Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-nearbyint.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-nearbyint.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-nearbyint.mir @@ -24,21 +24,10 @@ ; NO-FP16: liveins: $d0 ; NO-FP16-NEXT: {{ $}} ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT]] - ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT]](s32) - ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT1:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT1]] - ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT1]](s32) - ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT2:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT2]] - ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT2]](s32) - ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT3:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT3]] - ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT3]](s32) - ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16) - ; NO-FP16-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>) + ; NO-FP16-NEXT: [[FNEARBYINT:%[0-9]+]]:_(<4 x s32>) = G_FNEARBYINT [[FPEXT]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FNEARBYINT]](<4 x s32>) + ; NO-FP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>) ; NO-FP16-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = G_FNEARBYINT %0 @@ -67,33 +56,15 @@ ; NO-FP16: liveins: $q0 ; NO-FP16-NEXT: {{ $}} ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT]] - ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT]](s32) - ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT1:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT1]] - ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT1]](s32) - ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT2:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT2]] - ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT2]](s32) - ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT3:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT3]] - ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT3]](s32) - ; NO-FP16-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT4:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT4]] - ; NO-FP16-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT4]](s32) - ; NO-FP16-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT5:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT5]] - ; NO-FP16-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT5]](s32) - ; NO-FP16-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT6:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT6]] - ; NO-FP16-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT6]](s32) - ; NO-FP16-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16) - ; NO-FP16-NEXT: [[FNEARBYINT7:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT7]] - ; NO-FP16-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT7]](s32) - ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16) - ; NO-FP16-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>) + ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; NO-FP16-NEXT: [[FNEARBYINT:%[0-9]+]]:_(<4 x s32>) = G_FNEARBYINT [[FPEXT]] + ; NO-FP16-NEXT: [[FNEARBYINT1:%[0-9]+]]:_(<4 x s32>) = G_FNEARBYINT [[FPEXT1]] + ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FNEARBYINT]](<4 x s32>) + ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FNEARBYINT1]](<4 x s32>) + ; NO-FP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>) + ; NO-FP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>) ; NO-FP16-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 %1:_(<8 x s16>) = G_FNEARBYINT %0 Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -158,9 +158,9 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_INTRINSIC_ROUNDEVEN (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined - +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_READCYCLECOUNTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined @@ -442,7 +442,6 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FMA (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FMAD (opcode {{[0-9]+}}): 1 type index, 0 imm indices @@ -614,6 +613,7 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_FCEIL (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FCOS (opcode {{[0-9]+}}): 1 type index, 0 imm indices Index: llvm/test/CodeGen/AArch64/fcvt.ll =================================================================== --- llvm/test/CodeGen/AArch64/fcvt.ll +++ llvm/test/CodeGen/AArch64/fcvt.ll @@ -1,59 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 ; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 -; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 -; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 - -; CHECK-GI: warning: Instruction selection used fallback path for ceil_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v3f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v8f32 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for ceil_v7f16 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for ceil_v16f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v3f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v8f32 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for floor_v7f16 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for floor_v16f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v3f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v8f32 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for nearbyint_v7f16 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for nearbyint_v16f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v3f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v8f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v7f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v8f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v16f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v3f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v8f32 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for rint_v7f16 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for rint_v16f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v3f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v8f32 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for round_v7f16 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for round_v16f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v3f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v4f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v3f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v8f32 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for trunc_v7f16 -; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for trunc_v16f16 +; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 define double @ceil_f64(double %a) { ; CHECK-LABEL: ceil_f64: @@ -115,19 +64,30 @@ } define <3 x double> @ceil_v3f64(<3 x double> %a) { -; CHECK-LABEL: ceil_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: frintp v2.2d, v2.2d -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: frintp v0.2d, v0.2d -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ceil_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: frintp v2.2d, v2.2d +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: frintp v0.2d, v0.2d +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ceil_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: frintp d2, d2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: frintp v0.2d, v0.2d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret entry: %c = call <3 x double> @llvm.ceil.v3f64(<3 x double> %a) ret <3 x double> %c @@ -155,10 +115,25 @@ } define <3 x float> @ceil_v3f32(<3 x float> %a) { -; CHECK-LABEL: ceil_v3f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: frintp v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ceil_v3f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: frintp v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ceil_v3f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: frintp v0.4s, v0.4s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: ret entry: %c = call <3 x float> @llvm.ceil.v3f32(<3 x float> %a) ret <3 x float> %c @@ -236,46 +211,71 @@ ; ; CHECK-GI-NOFP16-LABEL: ceil_v7f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintp s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frintp s5, s1 -; CHECK-GI-NOFP16-NEXT: frintp s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frintp s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: frintp s3, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s3 -; CHECK-GI-NOFP16-NEXT: frintp s3, s5 -; CHECK-GI-NOFP16-NEXT: frintp s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frintp v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frintp v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: ceil_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintp v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.ceil.v7f16(<7 x half> %a) @@ -314,26 +314,9 @@ ; ; CHECK-GI-NOFP16-LABEL: ceil_v4f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintp s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: frintp s1, s1 -; CHECK-GI-NOFP16-NEXT: frintp s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s3 -; CHECK-GI-NOFP16-NEXT: frintp s3, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frintp v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: ceil_v4f16: @@ -396,45 +379,12 @@ ; ; CHECK-GI-NOFP16-LABEL: ceil_v8f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintp s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frintp s5, s1 -; CHECK-GI-NOFP16-NEXT: frintp s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frintp s4, s4 -; CHECK-GI-NOFP16-NEXT: frintp s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintp s2, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: frintp s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: frintp s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: frintp v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frintp v2.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: ceil_v8f16: @@ -537,84 +487,18 @@ ; ; CHECK-GI-NOFP16-LABEL: ceil_v16f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h1 -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintp s4, s4 -; CHECK-GI-NOFP16-NEXT: frintp s5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frintp s16, s2 -; CHECK-GI-NOFP16-NEXT: frintp s17, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: frintp s6, s6 -; CHECK-GI-NOFP16-NEXT: frintp s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h19 -; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4] -; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s6 -; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt h6, s7 -; CHECK-GI-NOFP16-NEXT: frintp s16, s16 -; CHECK-GI-NOFP16-NEXT: frintp s17, s17 -; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s17 -; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frintp s5, s5 -; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6] -; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0] -; CHECK-GI-NOFP16-NEXT: frintp s16, s18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h17 -; CHECK-GI-NOFP16-NEXT: frintp s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s16 -; CHECK-GI-NOFP16-NEXT: frintp s17, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h7, s7 -; CHECK-GI-NOFP16-NEXT: frintp s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: frintp s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 -; CHECK-GI-NOFP16-NEXT: frintp s0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: frintp s1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: frintp v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: frintp v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: frintp v4.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: frintp v5.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: ceil_v16f16: @@ -687,19 +571,30 @@ } define <3 x double> @floor_v3f64(<3 x double> %a) { -; CHECK-LABEL: floor_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: frintm v2.2d, v2.2d -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: frintm v0.2d, v0.2d -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: floor_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: frintm v2.2d, v2.2d +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: frintm v0.2d, v0.2d +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: floor_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: frintm d2, d2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: frintm v0.2d, v0.2d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret entry: %c = call <3 x double> @llvm.floor.v3f64(<3 x double> %a) ret <3 x double> %c @@ -727,10 +622,25 @@ } define <3 x float> @floor_v3f32(<3 x float> %a) { -; CHECK-LABEL: floor_v3f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: frintm v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: floor_v3f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: frintm v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: floor_v3f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: frintm v0.4s, v0.4s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: ret entry: %c = call <3 x float> @llvm.floor.v3f32(<3 x float> %a) ret <3 x float> %c @@ -808,46 +718,71 @@ ; ; CHECK-GI-NOFP16-LABEL: floor_v7f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintm s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frintm s5, s1 -; CHECK-GI-NOFP16-NEXT: frintm s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frintm s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: frintm s3, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s3 -; CHECK-GI-NOFP16-NEXT: frintm s3, s5 -; CHECK-GI-NOFP16-NEXT: frintm s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frintm v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frintm v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: floor_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintm v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.floor.v7f16(<7 x half> %a) @@ -886,26 +821,9 @@ ; ; CHECK-GI-NOFP16-LABEL: floor_v4f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintm s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: frintm s1, s1 -; CHECK-GI-NOFP16-NEXT: frintm s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s3 -; CHECK-GI-NOFP16-NEXT: frintm s3, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frintm v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: floor_v4f16: @@ -968,45 +886,12 @@ ; ; CHECK-GI-NOFP16-LABEL: floor_v8f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintm s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frintm s5, s1 -; CHECK-GI-NOFP16-NEXT: frintm s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frintm s4, s4 -; CHECK-GI-NOFP16-NEXT: frintm s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintm s2, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: frintm s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: frintm s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: frintm v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frintm v2.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: floor_v8f16: @@ -1109,84 +994,18 @@ ; ; CHECK-GI-NOFP16-LABEL: floor_v16f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h1 -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintm s4, s4 -; CHECK-GI-NOFP16-NEXT: frintm s5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frintm s16, s2 -; CHECK-GI-NOFP16-NEXT: frintm s17, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: frintm s6, s6 -; CHECK-GI-NOFP16-NEXT: frintm s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h19 -; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4] -; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s6 -; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt h6, s7 -; CHECK-GI-NOFP16-NEXT: frintm s16, s16 -; CHECK-GI-NOFP16-NEXT: frintm s17, s17 -; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s17 -; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frintm s5, s5 -; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6] -; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0] -; CHECK-GI-NOFP16-NEXT: frintm s16, s18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h17 -; CHECK-GI-NOFP16-NEXT: frintm s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s16 -; CHECK-GI-NOFP16-NEXT: frintm s17, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h7, s7 -; CHECK-GI-NOFP16-NEXT: frintm s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: frintm s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 -; CHECK-GI-NOFP16-NEXT: frintm s0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: frintm s1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: frintm v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: frintm v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: frintm v4.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: frintm v5.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: floor_v16f16: @@ -1259,19 +1078,30 @@ } define <3 x double> @nearbyint_v3f64(<3 x double> %a) { -; CHECK-LABEL: nearbyint_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: frinti v2.2d, v2.2d -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: frinti v0.2d, v0.2d -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: nearbyint_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: frinti v2.2d, v2.2d +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: frinti v0.2d, v0.2d +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: nearbyint_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: frinti d2, d2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: frinti v0.2d, v0.2d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret entry: %c = call <3 x double> @llvm.nearbyint.v3f64(<3 x double> %a) ret <3 x double> %c @@ -1299,10 +1129,25 @@ } define <3 x float> @nearbyint_v3f32(<3 x float> %a) { -; CHECK-LABEL: nearbyint_v3f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: frinti v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: nearbyint_v3f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: frinti v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: nearbyint_v3f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: frinti v0.4s, v0.4s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: ret entry: %c = call <3 x float> @llvm.nearbyint.v3f32(<3 x float> %a) ret <3 x float> %c @@ -1380,46 +1225,71 @@ ; ; CHECK-GI-NOFP16-LABEL: nearbyint_v7f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frinti s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frinti s5, s1 -; CHECK-GI-NOFP16-NEXT: frinti s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frinti s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: frinti s3, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s3 -; CHECK-GI-NOFP16-NEXT: frinti s3, s5 -; CHECK-GI-NOFP16-NEXT: frinti s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frinti v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frinti v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: nearbyint_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frinti v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.nearbyint.v7f16(<7 x half> %a) @@ -1458,26 +1328,9 @@ ; ; CHECK-GI-NOFP16-LABEL: nearbyint_v4f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frinti s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: frinti s1, s1 -; CHECK-GI-NOFP16-NEXT: frinti s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s3 -; CHECK-GI-NOFP16-NEXT: frinti s3, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frinti v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: nearbyint_v4f16: @@ -1540,45 +1393,12 @@ ; ; CHECK-GI-NOFP16-LABEL: nearbyint_v8f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frinti s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frinti s5, s1 -; CHECK-GI-NOFP16-NEXT: frinti s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frinti s4, s4 -; CHECK-GI-NOFP16-NEXT: frinti s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frinti s2, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: frinti s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: frinti s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: frinti v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frinti v2.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: nearbyint_v8f16: @@ -1681,84 +1501,18 @@ ; ; CHECK-GI-NOFP16-LABEL: nearbyint_v16f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h1 -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frinti s4, s4 -; CHECK-GI-NOFP16-NEXT: frinti s5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frinti s16, s2 -; CHECK-GI-NOFP16-NEXT: frinti s17, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: frinti s6, s6 -; CHECK-GI-NOFP16-NEXT: frinti s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h19 -; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4] -; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s6 -; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt h6, s7 -; CHECK-GI-NOFP16-NEXT: frinti s16, s16 -; CHECK-GI-NOFP16-NEXT: frinti s17, s17 -; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s17 -; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frinti s5, s5 -; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6] -; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0] -; CHECK-GI-NOFP16-NEXT: frinti s16, s18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h17 -; CHECK-GI-NOFP16-NEXT: frinti s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s16 -; CHECK-GI-NOFP16-NEXT: frinti s17, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h7, s7 -; CHECK-GI-NOFP16-NEXT: frinti s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: frinti s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 -; CHECK-GI-NOFP16-NEXT: frinti s0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: frinti s1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: frinti v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: frinti v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: frinti v4.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: frinti v5.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: nearbyint_v16f16: @@ -1831,19 +1585,30 @@ } define <3 x double> @roundeven_v3f64(<3 x double> %a) { -; CHECK-LABEL: roundeven_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: frintn v2.2d, v2.2d -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: frintn v0.2d, v0.2d -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: roundeven_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: frintn v2.2d, v2.2d +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: frintn v0.2d, v0.2d +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: roundeven_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: frintn d2, d2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: frintn v0.2d, v0.2d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret entry: %c = call <3 x double> @llvm.roundeven.v3f64(<3 x double> %a) ret <3 x double> %c @@ -1871,10 +1636,25 @@ } define <3 x float> @roundeven_v3f32(<3 x float> %a) { -; CHECK-LABEL: roundeven_v3f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: frintn v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: roundeven_v3f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: frintn v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: roundeven_v3f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: frintn v0.4s, v0.4s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: ret entry: %c = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %a) ret <3 x float> %c @@ -1952,50 +1732,71 @@ ; ; CHECK-GI-NOFP16-LABEL: roundeven_v7f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintn s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: frintn s5, s1 -; CHECK-GI-NOFP16-NEXT: frintn s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frintn s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s4 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintn s4, s5 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[6] -; CHECK-GI-NOFP16-NEXT: frintn s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: frintn s2, s2 -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0] -; CHECK-GI-NOFP16-NEXT: frintn s0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frintn v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frintn v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: roundeven_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintn v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.roundeven.v7f16(<7 x half> %a) @@ -2034,26 +1835,9 @@ ; ; CHECK-GI-NOFP16-LABEL: roundeven_v4f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: frintn s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintn s1, s1 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s2 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h4 -; CHECK-GI-NOFP16-NEXT: frintn s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: frintn s2, s2 -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s2 -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frintn v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: roundeven_v4f16: @@ -2116,45 +1900,12 @@ ; ; CHECK-GI-NOFP16-LABEL: roundeven_v8f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintn s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: frintn s5, s1 -; CHECK-GI-NOFP16-NEXT: frintn s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frintn s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s4 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintn s4, s5 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[6] -; CHECK-GI-NOFP16-NEXT: frintn s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: frintn s2, s2 -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0] -; CHECK-GI-NOFP16-NEXT: frintn s0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: frintn v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frintn v2.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: roundeven_v8f16: @@ -2257,84 +2008,18 @@ ; ; CHECK-GI-NOFP16-LABEL: roundeven_v16f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h1 -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: frintn s5, s5 -; CHECK-GI-NOFP16-NEXT: frintn s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frintn s16, s2 -; CHECK-GI-NOFP16-NEXT: frintn s17, s3 -; CHECK-GI-NOFP16-NEXT: frintn s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s6 -; CHECK-GI-NOFP16-NEXT: frintn s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s17 -; CHECK-GI-NOFP16-NEXT: mov h16, v1.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s17, h18 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h7, s7 -; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] -; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v6.h[0] -; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s16, h16 -; CHECK-GI-NOFP16-NEXT: frintn s17, s17 -; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frintn s16, s16 -; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v7.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h7, s17 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: frintn s5, s5 -; CHECK-GI-NOFP16-NEXT: frintn s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s16 -; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v7.h[0] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[5] -; CHECK-GI-NOFP16-NEXT: frintn s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 -; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[6] -; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v6.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s6, h7 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: frintn s6, s6 -; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: frintn s5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: frintn s0, s0 -; CHECK-GI-NOFP16-NEXT: frintn s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v6.h[0] -; CHECK-GI-NOFP16-NEXT: frintn s1, s1 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: frintn v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: frintn v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: frintn v4.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: frintn v5.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: roundeven_v16f16: @@ -2407,19 +2092,30 @@ } define <3 x double> @rint_v3f64(<3 x double> %a) { -; CHECK-LABEL: rint_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: frintx v2.2d, v2.2d -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: frintx v0.2d, v0.2d -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: rint_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: frintx v2.2d, v2.2d +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: frintx v0.2d, v0.2d +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: rint_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: frintx d2, d2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: frintx v0.2d, v0.2d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret entry: %c = call <3 x double> @llvm.rint.v3f64(<3 x double> %a) ret <3 x double> %c @@ -2447,10 +2143,25 @@ } define <3 x float> @rint_v3f32(<3 x float> %a) { -; CHECK-LABEL: rint_v3f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: frintx v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: rint_v3f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: frintx v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: rint_v3f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: frintx v0.4s, v0.4s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: ret entry: %c = call <3 x float> @llvm.rint.v3f32(<3 x float> %a) ret <3 x float> %c @@ -2528,46 +2239,71 @@ ; ; CHECK-GI-NOFP16-LABEL: rint_v7f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintx s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frintx s5, s1 -; CHECK-GI-NOFP16-NEXT: frintx s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frintx s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: frintx s3, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s3 -; CHECK-GI-NOFP16-NEXT: frintx s3, s5 -; CHECK-GI-NOFP16-NEXT: frintx s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frintx v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frintx v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: rint_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintx v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.rint.v7f16(<7 x half> %a) @@ -2606,26 +2342,9 @@ ; ; CHECK-GI-NOFP16-LABEL: rint_v4f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintx s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: frintx s1, s1 -; CHECK-GI-NOFP16-NEXT: frintx s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s3 -; CHECK-GI-NOFP16-NEXT: frintx s3, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frintx v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: rint_v4f16: @@ -2688,45 +2407,12 @@ ; ; CHECK-GI-NOFP16-LABEL: rint_v8f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintx s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frintx s5, s1 -; CHECK-GI-NOFP16-NEXT: frintx s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frintx s4, s4 -; CHECK-GI-NOFP16-NEXT: frintx s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintx s2, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: frintx s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: frintx s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: frintx v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frintx v2.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: rint_v8f16: @@ -2829,84 +2515,18 @@ ; ; CHECK-GI-NOFP16-LABEL: rint_v16f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h1 -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintx s4, s4 -; CHECK-GI-NOFP16-NEXT: frintx s5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frintx s16, s2 -; CHECK-GI-NOFP16-NEXT: frintx s17, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: frintx s6, s6 -; CHECK-GI-NOFP16-NEXT: frintx s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h19 -; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4] -; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s6 -; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt h6, s7 -; CHECK-GI-NOFP16-NEXT: frintx s16, s16 -; CHECK-GI-NOFP16-NEXT: frintx s17, s17 -; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s17 -; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frintx s5, s5 -; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6] -; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0] -; CHECK-GI-NOFP16-NEXT: frintx s16, s18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h17 -; CHECK-GI-NOFP16-NEXT: frintx s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s16 -; CHECK-GI-NOFP16-NEXT: frintx s17, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h7, s7 -; CHECK-GI-NOFP16-NEXT: frintx s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: frintx s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 -; CHECK-GI-NOFP16-NEXT: frintx s0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: frintx s1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: frintx v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: frintx v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: frintx v4.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: frintx v5.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: rint_v16f16: @@ -2979,19 +2599,30 @@ } define <3 x double> @round_v3f64(<3 x double> %a) { -; CHECK-LABEL: round_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: frinta v2.2d, v2.2d -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: frinta v0.2d, v0.2d -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: round_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: frinta v2.2d, v2.2d +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: frinta v0.2d, v0.2d +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: round_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: frinta d2, d2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: frinta v0.2d, v0.2d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret entry: %c = call <3 x double> @llvm.round.v3f64(<3 x double> %a) ret <3 x double> %c @@ -3019,10 +2650,25 @@ } define <3 x float> @round_v3f32(<3 x float> %a) { -; CHECK-LABEL: round_v3f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: frinta v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: round_v3f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: frinta v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: round_v3f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: frinta v0.4s, v0.4s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: ret entry: %c = call <3 x float> @llvm.round.v3f32(<3 x float> %a) ret <3 x float> %c @@ -3100,46 +2746,71 @@ ; ; CHECK-GI-NOFP16-LABEL: round_v7f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frinta s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frinta s5, s1 -; CHECK-GI-NOFP16-NEXT: frinta s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frinta s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: frinta s3, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s3 -; CHECK-GI-NOFP16-NEXT: frinta s3, s5 -; CHECK-GI-NOFP16-NEXT: frinta s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frinta v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frinta v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: round_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frinta v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.round.v7f16(<7 x half> %a) @@ -3178,26 +2849,9 @@ ; ; CHECK-GI-NOFP16-LABEL: round_v4f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frinta s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: frinta s1, s1 -; CHECK-GI-NOFP16-NEXT: frinta s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s3 -; CHECK-GI-NOFP16-NEXT: frinta s3, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frinta v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: round_v4f16: @@ -3260,45 +2914,12 @@ ; ; CHECK-GI-NOFP16-LABEL: round_v8f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frinta s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frinta s5, s1 -; CHECK-GI-NOFP16-NEXT: frinta s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frinta s4, s4 -; CHECK-GI-NOFP16-NEXT: frinta s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frinta s2, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: frinta s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: frinta s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: frinta v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frinta v2.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: round_v8f16: @@ -3401,84 +3022,18 @@ ; ; CHECK-GI-NOFP16-LABEL: round_v16f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h1 -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frinta s4, s4 -; CHECK-GI-NOFP16-NEXT: frinta s5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frinta s16, s2 -; CHECK-GI-NOFP16-NEXT: frinta s17, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: frinta s6, s6 -; CHECK-GI-NOFP16-NEXT: frinta s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h19 -; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4] -; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s6 -; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt h6, s7 -; CHECK-GI-NOFP16-NEXT: frinta s16, s16 -; CHECK-GI-NOFP16-NEXT: frinta s17, s17 -; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s17 -; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frinta s5, s5 -; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6] -; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0] -; CHECK-GI-NOFP16-NEXT: frinta s16, s18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h17 -; CHECK-GI-NOFP16-NEXT: frinta s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s16 -; CHECK-GI-NOFP16-NEXT: frinta s17, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h7, s7 -; CHECK-GI-NOFP16-NEXT: frinta s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: frinta s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 -; CHECK-GI-NOFP16-NEXT: frinta s0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: frinta s1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: frinta v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: frinta v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: frinta v4.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: frinta v5.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: round_v16f16: @@ -3551,19 +3106,30 @@ } define <3 x double> @trunc_v3f64(<3 x double> %a) { -; CHECK-LABEL: trunc_v3f64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: frintz v2.2d, v2.2d -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 -; CHECK-NEXT: frintz v0.2d, v0.2d -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: trunc_v3f64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: frintz v2.2d, v2.2d +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: frintz v0.2d, v0.2d +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: trunc_v3f64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: frintz d2, d2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: frintz v0.2d, v0.2d +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret entry: %c = call <3 x double> @llvm.trunc.v3f64(<3 x double> %a) ret <3 x double> %c @@ -3591,10 +3157,25 @@ } define <3 x float> @trunc_v3f32(<3 x float> %a) { -; CHECK-LABEL: trunc_v3f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: frintz v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: trunc_v3f32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: frintz v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: trunc_v3f32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: frintz v0.4s, v0.4s +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: ret entry: %c = call <3 x float> @llvm.trunc.v3f32(<3 x float> %a) ret <3 x float> %c @@ -3672,46 +3253,71 @@ ; ; CHECK-GI-NOFP16-LABEL: trunc_v7f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintz s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frintz s5, s1 -; CHECK-GI-NOFP16-NEXT: frintz s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frintz s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: frintz s3, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s3 -; CHECK-GI-NOFP16-NEXT: frintz s3, s5 -; CHECK-GI-NOFP16-NEXT: frintz s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5] +; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] +; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frintz v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frintz v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1] +; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2] +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s +; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] +; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2] +; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3] +; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0] +; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1] +; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2] +; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: trunc_v7f16: ; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: frintz v0.8h, v0.8h +; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] +; CHECK-GI-FP16-NEXT: mov h2, v0.h[2] +; CHECK-GI-FP16-NEXT: mov h3, v0.h[3] +; CHECK-GI-FP16-NEXT: mov h4, v0.h[4] +; CHECK-GI-FP16-NEXT: mov h5, v0.h[5] +; CHECK-GI-FP16-NEXT: mov h6, v0.h[6] +; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0] +; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0] ; CHECK-GI-FP16-NEXT: ret entry: %c = call <7 x half> @llvm.trunc.v7f16(<7 x half> %a) @@ -3750,26 +3356,9 @@ ; ; CHECK-GI-NOFP16-LABEL: trunc_v4f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintz s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: frintz s1, s1 -; CHECK-GI-NOFP16-NEXT: frintz s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s3 -; CHECK-GI-NOFP16-NEXT: frintz s3, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0] -; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: frintz v0.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: trunc_v4f16: @@ -3832,45 +3421,12 @@ ; ; CHECK-GI-NOFP16-LABEL: trunc_v8f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: frintz s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: frintz s5, s1 -; CHECK-GI-NOFP16-NEXT: frintz s2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s3 -; CHECK-GI-NOFP16-NEXT: frintz s4, s4 -; CHECK-GI-NOFP16-NEXT: frintz s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5] -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintz s2, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: frintz s3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s2 -; CHECK-GI-NOFP16-NEXT: frintz s0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h3, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0] -; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: frintz v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: frintz v2.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: trunc_v8f16: @@ -3973,84 +3529,18 @@ ; ; CHECK-GI-NOFP16-LABEL: trunc_v16f16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] -; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h0 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h1 -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2] -; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2] -; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3] -; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3] -; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 -; CHECK-GI-NOFP16-NEXT: fcvt s3, h3 -; CHECK-GI-NOFP16-NEXT: frintz s4, s4 -; CHECK-GI-NOFP16-NEXT: frintz s5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frintz s16, s2 -; CHECK-GI-NOFP16-NEXT: frintz s17, s3 -; CHECK-GI-NOFP16-NEXT: fcvt h2, s4 -; CHECK-GI-NOFP16-NEXT: fcvt h3, s5 -; CHECK-GI-NOFP16-NEXT: frintz s6, s6 -; CHECK-GI-NOFP16-NEXT: frintz s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt s16, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h19 -; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4] -; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s6 -; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4] -; CHECK-GI-NOFP16-NEXT: fcvt h6, s7 -; CHECK-GI-NOFP16-NEXT: frintz s16, s16 -; CHECK-GI-NOFP16-NEXT: frintz s17, s17 -; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5] -; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt s18, h18 -; CHECK-GI-NOFP16-NEXT: fcvt s5, h5 -; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h4, s16 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s17 -; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5] -; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6] -; CHECK-GI-NOFP16-NEXT: fcvt s7, h7 -; CHECK-GI-NOFP16-NEXT: frintz s5, s5 -; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6] -; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0] -; CHECK-GI-NOFP16-NEXT: frintz s16, s18 -; CHECK-GI-NOFP16-NEXT: fcvt s17, h17 -; CHECK-GI-NOFP16-NEXT: frintz s7, s7 -; CHECK-GI-NOFP16-NEXT: fcvt s6, h6 -; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7] -; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7] -; CHECK-GI-NOFP16-NEXT: fcvt s4, h4 -; CHECK-GI-NOFP16-NEXT: fcvt h5, s5 -; CHECK-GI-NOFP16-NEXT: fcvt h16, s16 -; CHECK-GI-NOFP16-NEXT: frintz s17, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h7, s7 -; CHECK-GI-NOFP16-NEXT: frintz s6, s6 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: frintz s4, s4 -; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h5, s17 -; CHECK-GI-NOFP16-NEXT: fcvt h6, s6 -; CHECK-GI-NOFP16-NEXT: frintz s0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h4, s4 -; CHECK-GI-NOFP16-NEXT: frintz s1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0] -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 -; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0] -; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0] -; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0] -; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: frintz v2.4s, v2.4s +; CHECK-GI-NOFP16-NEXT: frintz v3.4s, v3.4s +; CHECK-GI-NOFP16-NEXT: frintz v4.4s, v0.4s +; CHECK-GI-NOFP16-NEXT: frintz v5.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s +; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s +; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: trunc_v16f16: @@ -4161,7 +3651,3 @@ declare half @llvm.round.f16(half) declare half @llvm.roundeven.f16(half) declare half @llvm.trunc.f16(half) - -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-GI: {{.*}} -; CHECK-SD: {{.*}}