diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -35,6 +35,17 @@ // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx // ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx // +// 6. %intermediate:gpr32 = COPY %src:fpr128 +// %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32 +// ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0 +// +// In cases where a source FPR is copied to a GPR in order to be copied +// to a destination FPR, we can directly copy the values between the FPRs, +// eliminating the use of the Integer unit. When we match a pattern of +// INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR +// source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr +// instructions. +// //===----------------------------------------------------------------------===// #include "AArch64ExpandImm.h" @@ -99,6 +110,7 @@ bool visitAND(unsigned Opc, MachineInstr &MI); bool visitORR(MachineInstr &MI); bool visitINSERT(MachineInstr &MI); + bool visitINSviGPR(MachineInstr &MI, unsigned Opc); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -535,6 +547,50 @@ return true; } +bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) { + // Check if this INSvi[X]gpr comes from COPY of a source FPR128 + // + // From + // %intermediate1:gpr64 = COPY %src:fpr128 + // %intermediate2:gpr32 = COPY %intermediate1:gpr64 + // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32 + // To + // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128, + // src_index + // where src_index = 0, X = [8|16|32|64] + + MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg()); + + // For a chain of COPY instructions, find the initial source register + // and check if it's an FPR128 + while (true) { + if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY) + return false; + + if (!SrcMI->getOperand(1).getReg().isVirtual()) + return false; + + if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) == + &AArch64::FPR128RegClass) { + break; + } + SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg()); + } + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = SrcMI->getOperand(1).getReg(); + MachineInstr *INSvilaneMI = + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .addUse(SrcReg, getRegState(SrcMI->getOperand(1))) + .addImm(0); + + LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n"); + MI.eraseFromParent(); + return true; +} + bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -598,6 +654,18 @@ {AArch64::ADDXri, AArch64::ADDSXri}, MI); break; + case AArch64::INSvi64gpr: + Changed = visitINSviGPR(MI, AArch64::INSvi64lane); + break; + case AArch64::INSvi32gpr: + Changed = visitINSviGPR(MI, AArch64::INSvi32lane); + break; + case AArch64::INSvi16gpr: + Changed = visitINSviGPR(MI, AArch64::INSvi16lane); + break; + case AArch64::INSvi8gpr: + Changed = visitINSviGPR(MI, AArch64::INSvi8lane); + break; } } } diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -13,10 +13,9 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.8h s0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov.s v1[0], w8 -; CHECK-NEXT: ucvtf.2s v0, v1 -; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: mov.s v1[0], v0[0] +; CHECK-NEXT: ucvtf.2s v1, v1 +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret entry: @@ -32,8 +31,7 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: uaddlv.8h s1, v0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.s v0[0], w8 +; CHECK-NEXT: mov.s v0[0], v1[0] ; CHECK-NEXT: ucvtf.4s v0, v0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret @@ -50,12 +48,11 @@ ; CHECK-LABEL: insert_vec_v16i32_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v2, #0000000000000000 -; CHECK-NEXT: uaddlv.8h s1, v0 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s2, v0 ; CHECK-NEXT: stp q0, q0, [x0, #32] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.s v2[0], w8 -; CHECK-NEXT: ucvtf.4s v1, v2 +; CHECK-NEXT: mov.s v1[0], v2[0] +; CHECK-NEXT: ucvtf.4s v1, v1 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret @@ -71,16 +68,15 @@ ; CHECK-LABEL: insert_vec_v23i32_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v2, #0000000000000000 -; CHECK-NEXT: uaddlv.8h s1, v0 +; CHECK-NEXT: add x8, x0, #88 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s2, v0 ; CHECK-NEXT: stp q0, q0, [x0, #16] ; CHECK-NEXT: stp q0, q0, [x0, #48] -; CHECK-NEXT: str d0, [x0, #80] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.s v2[0], w8 -; CHECK-NEXT: add x8, x0, #88 ; CHECK-NEXT: st1.s { v0 }[2], [x8] -; CHECK-NEXT: ucvtf.4s v1, v2 +; CHECK-NEXT: mov.s v1[0], v2[0] +; CHECK-NEXT: str d0, [x0, #80] +; CHECK-NEXT: ucvtf.4s v1, v1 ; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret @@ -98,10 +94,9 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.16b h0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov.s v1[0], w8 -; CHECK-NEXT: ucvtf.2s v0, v1 -; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: mov.s v1[0], v0[0] +; CHECK-NEXT: ucvtf.2s v1, v1 +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret entry: @@ -117,8 +112,7 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: uaddlv.8b h1, v0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.s v0[0], w8 +; CHECK-NEXT: mov.s v0[0], v1[0] ; CHECK-NEXT: ucvtf.2s v0, v0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret @@ -136,8 +130,7 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: uaddlv.4h s1, v0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.s v0[0], w8 +; CHECK-NEXT: mov.s v0[0], v1[0] ; CHECK-NEXT: ucvtf.2s v0, v0 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret @@ -155,11 +148,10 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: movi.2d v3, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d2, v1 -; CHECK-NEXT: fmov x8, d2 -; CHECK-NEXT: movi.2d v2, #0000000000000000 -; CHECK-NEXT: mov.d v1[0], x8 -; CHECK-NEXT: str d2, [x0, #16] +; CHECK-NEXT: str d3, [x0, #16] +; CHECK-NEXT: mov.d v1[0], v2[0] ; CHECK-NEXT: ucvtf.2d v1, v1 ; CHECK-NEXT: fcvtn v1.2s, v1.2d ; CHECK-NEXT: mov.d v1[1], v0[0] @@ -179,8 +171,7 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d1, v0 -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: mov.d v0[0], x8 +; CHECK-NEXT: mov.d v0[0], v1[0] ; CHECK-NEXT: ucvtf.2d v0, v0 ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: str d0, [x0] @@ -201,8 +192,7 @@ ; CHECK-NEXT: str wzr, [x0, #16] ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d2, v1 -; CHECK-NEXT: fmov x8, d2 -; CHECK-NEXT: mov.d v1[0], x8 +; CHECK-NEXT: mov.d v1[0], v2[0] ; CHECK-NEXT: ucvtf.2d v1, v1 ; CHECK-NEXT: fcvtn v1.2s, v1.2d ; CHECK-NEXT: mov.d v1[1], v0[0] @@ -224,11 +214,10 @@ ; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.8h s0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov.h v1[0], w8 -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: ucvtf.4s v0, v0 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: mov.h v1[0], v0[0] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret entry: @@ -244,15 +233,14 @@ ; CHECK-LABEL: insert_vec_v3i16_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.8h s0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov.h v1[0], w8 -; CHECK-NEXT: add x8, x0, #8 -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: ucvtf.4s v0, v0 -; CHECK-NEXT: st1.s { v0 }[2], [x8] -; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: mov.h v1[0], v0[0] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: st1.s { v1 }[2], [x8] +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret entry: @@ -269,12 +257,11 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: movi d0, #0000000000000000 -; CHECK-NEXT: movi.2d v3, #0000000000000000 -; CHECK-NEXT: uaddlv.4h s2, v1 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: uaddlv.4h s3, v1 ; CHECK-NEXT: stp q1, q1, [x0, #32] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov.s v3[0], w8 -; CHECK-NEXT: ucvtf.2d v2, v3 +; CHECK-NEXT: mov.s v2[0], v3[0] +; CHECK-NEXT: ucvtf.2d v2, v2 ; CHECK-NEXT: fcvtn v2.2s, v2.2d ; CHECK-NEXT: mov.d v2[1], v0[0] ; CHECK-NEXT: stp q2, q1, [x0] @@ -293,12 +280,11 @@ ; CHECK-LABEL: insert_vec_v16i8_uaddlv_from_v8i8: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v2, #0000000000000000 -; CHECK-NEXT: uaddlv.8b h1, v0 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.8b h2, v0 ; CHECK-NEXT: stp q0, q0, [x0, #32] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.b v2[0], w8 -; CHECK-NEXT: zip1.8b v1, v2, v0 +; CHECK-NEXT: mov.b v1[0], v2[0] +; CHECK-NEXT: zip1.8b v1, v1, v0 ; CHECK-NEXT: bic.4h v1, #255, lsl #8 ; CHECK-NEXT: ushll.4s v1, v1, #0 ; CHECK-NEXT: ucvtf.4s v1, v1 @@ -320,8 +306,7 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: uaddlv.8b h1, v0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.h v0[0], w8 +; CHECK-NEXT: mov.h v0[0], v1[0] ; CHECK-NEXT: bic.4h v0, #255, lsl #8 ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: ucvtf.4s v0, v0 @@ -344,8 +329,7 @@ ; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: stp xzr, xzr, [x0, #32] ; CHECK-NEXT: uaddlv.4h s1, v0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.h v0[0], w8 +; CHECK-NEXT: mov.h v0[0], v1[0] ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: ucvtf.4s v0, v0 ; CHECK-NEXT: str q0, [x0] @@ -366,8 +350,7 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: uaddlv.4s d1, v0 -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: mov.s v0[0], w8 +; CHECK-NEXT: mov.s v0[0], v1[0] ; CHECK-NEXT: ucvtf.4s v0, v0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret @@ -385,12 +368,11 @@ ; CHECK-LABEL: insert_vec_v16i32_uaddlv_from_v4i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v2, #0000000000000000 -; CHECK-NEXT: uaddlv.4s d1, v0 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.4s d2, v0 ; CHECK-NEXT: stp q0, q0, [x0, #32] -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: mov.s v2[0], w8 -; CHECK-NEXT: ucvtf.4s v1, v2 +; CHECK-NEXT: mov.s v1[0], v2[0] +; CHECK-NEXT: ucvtf.4s v1, v1 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret @@ -409,8 +391,7 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: mov.h v1[0], w8 +; CHECK-NEXT: mov.h v1[0], v0[0] ; CHECK-NEXT: ushll.4s v0, v1, #0 ; CHECK-NEXT: ucvtf.4s v0, v0 ; CHECK-NEXT: str q0, [x0] @@ -431,11 +412,10 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: mov.h v1[0], v0[0] ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: mov.h v1[0], w8 -; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ucvtf.4s v1, v1 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret @@ -456,8 +436,7 @@ ; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: mov.h v1[0], w8 +; CHECK-NEXT: mov.h v1[0], v0[0] ; CHECK-NEXT: bic.4h v1, #255, lsl #8 ; CHECK-NEXT: ushll.4s v0, v1, #0 ; CHECK-NEXT: ucvtf.4s v0, v0 @@ -479,8 +458,7 @@ ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: mov.b v1[0], w8 +; CHECK-NEXT: mov.b v1[0], v0[0] ; CHECK-NEXT: zip1.8b v0, v1, v0 ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: bic.4h v0, #255, lsl #8 @@ -504,8 +482,7 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: uaddlv.8h s1, v0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov.s v0[2], w8 +; CHECK-NEXT: mov.s v0[2], v1[0] ; CHECK-NEXT: ucvtf.4s v0, v0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -3377,13 +3377,11 @@ ; CHECK-NEXT: mov w8, #127 ; CHECK-NEXT: fcvtzs w11, d0 ; CHECK-NEXT: mov w9, #-128 -; CHECK-NEXT: fcvtzs w13, d1 ; CHECK-NEXT: mov d0, v2.d[1] -; CHECK-NEXT: fcvtzs w14, d2 +; CHECK-NEXT: fcvtzs w13, d1 ; CHECK-NEXT: fcvtzs w10, d16 ; CHECK-NEXT: mov d16, v1.d[1] -; CHECK-NEXT: mov d1, v3.d[1] -; CHECK-NEXT: fcvtzs w15, d0 +; CHECK-NEXT: fcvtzs w14, d0 ; CHECK-NEXT: cmp w10, #127 ; CHECK-NEXT: csel w10, w10, w8, lt ; CHECK-NEXT: fcvtzs w12, d16 @@ -3398,117 +3396,112 @@ ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w13, w13, w8, lt ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: csel w11, w13, w9, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: csel w10, w15, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w13, d3 -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: fcvtzs w11, d1 -; CHECK-NEXT: mov w15, v0.s[1] -; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: mov v2.s[1], w12 -; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w12, w14, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: csel w11, w11, w8, lt -; CHECK-NEXT: mov d1, v4.d[1] -; CHECK-NEXT: mov v0.b[1], w15 +; CHECK-NEXT: csel w11, w13, w8, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: fmov w14, s2 +; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: fmov s3, w12 -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov w12, v2.s[1] -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: mov v0.b[2], w14 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: fcvtzs w14, d4 -; CHECK-NEXT: mov d1, v5.d[1] -; CHECK-NEXT: mov v0.b[3], w12 -; CHECK-NEXT: fmov s4, w13 -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: fmov w13, s3 -; CHECK-NEXT: csel w10, w15, w8, lt -; CHECK-NEXT: mov w12, v3.s[1] -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: mov v0.b[4], w13 +; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: mov v4.s[1], w11 ; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w14, w14, w9, gt -; CHECK-NEXT: fcvtzs w13, d5 -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov d2, v6.d[1] -; CHECK-NEXT: mov v0.b[5], w12 -; CHECK-NEXT: csel w11, w15, w8, lt -; CHECK-NEXT: fmov w12, s4 -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: fmov s1, w14 -; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: mov d2, v3.d[1] +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: csel w11, w14, w9, gt ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov w14, v4.s[1] -; CHECK-NEXT: mov v0.b[6], w12 +; CHECK-NEXT: fcvtzs w10, d3 +; CHECK-NEXT: mov w14, v0.s[1] ; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: fcvtzs w15, d2 +; CHECK-NEXT: mov d3, v4.d[1] ; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: fcvtzs w10, d6 -; CHECK-NEXT: mov v0.b[7], w14 -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: fmov w14, s1 -; CHECK-NEXT: csel w12, w15, w8, lt +; CHECK-NEXT: mov v1.s[1], w12 +; CHECK-NEXT: fcvtzs w12, d2 +; CHECK-NEXT: mov v0.b[1], w14 ; CHECK-NEXT: fmov s2, w13 -; CHECK-NEXT: mov w13, v1.s[1] -; CHECK-NEXT: mov d1, v7.d[1] +; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: fcvtzs w13, d3 +; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: fcvtzs w14, d4 ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: fcvtzs w15, d7 +; CHECK-NEXT: mov d3, v5.d[1] +; CHECK-NEXT: mov v2.s[1], w11 +; CHECK-NEXT: mov w11, v1.s[1] +; CHECK-NEXT: mov v0.b[2], v1.b[0] ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: mov v0.b[8], w14 +; CHECK-NEXT: mov d4, v6.d[1] ; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: mov v2.s[1], w11 ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w11, d1 ; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: mov v0.b[9], w13 -; CHECK-NEXT: fmov w14, s2 -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: csel w10, w11, w8, lt +; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: mov v0.b[3], w11 +; CHECK-NEXT: csel w13, w13, w8, lt +; CHECK-NEXT: cmn w13, #128 +; CHECK-NEXT: fcvtzs w11, d3 +; CHECK-NEXT: csel w13, w13, w9, gt +; CHECK-NEXT: cmp w14, #127 +; CHECK-NEXT: fmov s3, w10 +; CHECK-NEXT: csel w10, w14, w8, lt +; CHECK-NEXT: mov w14, v2.s[1] ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: mov w13, v2.s[1] -; CHECK-NEXT: mov v0.b[10], w14 +; CHECK-NEXT: mov v0.b[4], v2.b[0] ; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov v1.s[1], w12 -; CHECK-NEXT: csel w8, w15, w8, lt +; CHECK-NEXT: mov v3.s[1], w12 +; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: csel w11, w11, w8, lt +; CHECK-NEXT: fcvtzs w12, d5 +; CHECK-NEXT: cmn w11, #128 +; CHECK-NEXT: mov v0.b[5], w14 +; CHECK-NEXT: fcvtzs w14, d4 +; CHECK-NEXT: fmov s4, w10 +; CHECK-NEXT: csel w10, w11, w9, gt +; CHECK-NEXT: mov w11, v3.s[1] +; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: mov v0.b[6], v3.b[0] +; CHECK-NEXT: cmn w12, #128 +; CHECK-NEXT: mov v4.s[1], w13 +; CHECK-NEXT: csel w12, w12, w9, gt +; CHECK-NEXT: cmp w14, #127 +; CHECK-NEXT: csel w13, w14, w8, lt +; CHECK-NEXT: mov v0.b[7], w11 +; CHECK-NEXT: fcvtzs w11, d6 +; CHECK-NEXT: cmn w13, #128 +; CHECK-NEXT: fmov s5, w12 +; CHECK-NEXT: csel w12, w13, w9, gt +; CHECK-NEXT: mov w13, v4.s[1] +; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: mov d6, v7.d[1] +; CHECK-NEXT: mov v0.b[8], v4.b[0] +; CHECK-NEXT: csel w11, w11, w8, lt +; CHECK-NEXT: cmn w11, #128 +; CHECK-NEXT: mov v5.s[1], w10 +; CHECK-NEXT: csel w10, w11, w9, gt +; CHECK-NEXT: fcvtzs w11, d6 +; CHECK-NEXT: mov v0.b[9], w13 +; CHECK-NEXT: fcvtzs w13, d7 +; CHECK-NEXT: fmov s6, w10 +; CHECK-NEXT: mov w10, v5.s[1] +; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: csel w11, w11, w8, lt +; CHECK-NEXT: mov v0.b[10], v5.b[0] +; CHECK-NEXT: cmn w11, #128 +; CHECK-NEXT: mov v6.s[1], w12 +; CHECK-NEXT: mov v0.b[11], w10 +; CHECK-NEXT: csel w10, w11, w9, gt +; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: csel w8, w13, w8, lt ; CHECK-NEXT: cmn w8, #128 ; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: mov v0.b[11], w13 -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov v0.b[12], w9 -; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: mov v0.b[13], w8 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov w9, v2.s[1] -; CHECK-NEXT: mov v0.b[14], w8 -; CHECK-NEXT: mov v0.b[15], w9 +; CHECK-NEXT: mov w9, v6.s[1] +; CHECK-NEXT: mov v0.b[12], v6.b[0] +; CHECK-NEXT: fmov s7, w8 +; CHECK-NEXT: mov v0.b[13], w9 +; CHECK-NEXT: mov v7.s[1], w10 +; CHECK-NEXT: mov v0.b[14], v7.b[0] +; CHECK-NEXT: mov w8, v7.s[1] +; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptosi.sat.v16f64.v16i8(<16 x double> %f) ret <16 x i8> %x diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -2820,92 +2820,85 @@ ; CHECK-NEXT: csel w10, w11, w8, lo ; CHECK-NEXT: cmp w12, #255 ; CHECK-NEXT: csel w11, w12, w8, lo +; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d2, v3.d[1] ; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d2 ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: mov w12, v0.s[1] +; CHECK-NEXT: mov w11, v0.s[1] ; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w12, #255 ; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: csel w12, w12, w8, lo ; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: mov d2, v4.d[1] -; CHECK-NEXT: mov v0.b[1], w12 -; CHECK-NEXT: fmov w13, s1 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: mov v0.b[1], w11 ; CHECK-NEXT: fcvtzu w11, d3 +; CHECK-NEXT: fmov s2, w12 +; CHECK-NEXT: mov w12, v1.s[1] ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v0.b[2], w13 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w10, w8, lo +; CHECK-NEXT: mov d3, v4.d[1] +; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: mov v0.b[2], v1.b[0] ; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: fcvtzu w10, d2 +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: mov d2, v5.d[1] +; CHECK-NEXT: fcvtzu w9, d3 +; CHECK-NEXT: mov d3, v5.d[1] ; CHECK-NEXT: mov v0.b[3], w12 -; CHECK-NEXT: fmov w12, s1 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w13, v1.s[1] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d4 -; CHECK-NEXT: mov v0.b[4], w12 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w10, w11, w8, lo -; CHECK-NEXT: mov v0.b[5], w13 -; CHECK-NEXT: fcvtzu w13, d2 -; CHECK-NEXT: fmov w11, s1 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: fcvtzu w10, d5 -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: mov v0.b[6], w11 -; CHECK-NEXT: mov d2, v6.d[1] -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w13, w8, lo +; CHECK-NEXT: fcvtzu w12, d4 +; CHECK-NEXT: fmov s4, w11 +; CHECK-NEXT: mov w11, v2.s[1] +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w12, #255 +; CHECK-NEXT: mov v0.b[4], v2.b[0] +; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: mov v4.s[1], w10 +; CHECK-NEXT: fcvtzu w10, d3 +; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: mov v0.b[5], w11 +; CHECK-NEXT: fcvtzu w11, d5 +; CHECK-NEXT: mov w12, v4.s[1] ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: fcvtzu w13, d6 ; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: mov d5, v6.d[1] +; CHECK-NEXT: cmp w11, #255 +; CHECK-NEXT: mov v0.b[6], v4.b[0] +; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: mov v3.s[1], w9 +; CHECK-NEXT: fcvtzu w9, d6 +; CHECK-NEXT: mov d6, v7.d[1] ; CHECK-NEXT: mov v0.b[7], w12 -; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fmov w11, s1 -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: mov w10, v1.s[1] +; CHECK-NEXT: fcvtzu w12, d5 +; CHECK-NEXT: fmov s5, w11 +; CHECK-NEXT: mov w11, v3.s[1] ; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: mov d1, v7.d[1] -; CHECK-NEXT: mov v0.b[8], w11 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: csel w9, w12, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w11, w13, w8, lo -; CHECK-NEXT: fcvtzu w13, d7 -; CHECK-NEXT: mov v0.b[9], w10 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: fmov s3, w11 -; CHECK-NEXT: fcvtzu w11, d1 -; CHECK-NEXT: mov w12, v2.s[1] -; CHECK-NEXT: mov v0.b[10], w10 -; CHECK-NEXT: mov v3.s[1], w9 +; CHECK-NEXT: mov v0.b[8], v3.b[0] +; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: mov v5.s[1], w10 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: fcvtzu w10, d6 +; CHECK-NEXT: mov v0.b[9], w11 +; CHECK-NEXT: fcvtzu w11, d7 +; CHECK-NEXT: fmov s16, w9 +; CHECK-NEXT: mov w9, v5.s[1] +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: mov v0.b[10], v5.b[0] +; CHECK-NEXT: mov v16.s[1], w12 +; CHECK-NEXT: mov v0.b[11], w9 +; CHECK-NEXT: csel w9, w10, w8, lo ; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w9, w11, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v0.b[11], w12 -; CHECK-NEXT: fmov w10, s3 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, v3.s[1] -; CHECK-NEXT: mov v0.b[12], w10 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: mov v0.b[13], w8 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov w9, v1.s[1] -; CHECK-NEXT: mov v0.b[14], w8 -; CHECK-NEXT: mov v0.b[15], w9 +; CHECK-NEXT: mov w10, v16.s[1] +; CHECK-NEXT: csel w8, w11, w8, lo +; CHECK-NEXT: mov v0.b[12], v16.b[0] +; CHECK-NEXT: fmov s6, w8 +; CHECK-NEXT: mov v0.b[13], w10 +; CHECK-NEXT: mov v6.s[1], w9 +; CHECK-NEXT: mov v0.b[14], v6.b[0] +; CHECK-NEXT: mov w8, v6.s[1] +; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptoui.sat.v16f64.v16i8(<16 x double> %f) ret <16 x i8> %x diff --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll --- a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll +++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll @@ -41,16 +41,15 @@ ; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: mov w10, v0.s[3] ; CHECK-NEXT: mov v0.b[1], w8 -; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov v0.b[2], w9 -; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: mov w9, v1.s[2] ; CHECK-NEXT: mov v0.b[3], w10 -; CHECK-NEXT: mov v0.b[4], w8 -; CHECK-NEXT: mov w8, v1.s[2] -; CHECK-NEXT: mov v0.b[5], w9 -; CHECK-NEXT: mov w9, v1.s[3] -; CHECK-NEXT: mov v0.b[6], w8 -; CHECK-NEXT: mov v0.b[7], w9 +; CHECK-NEXT: mov v0.b[4], v1.b[0] +; CHECK-NEXT: mov v0.b[5], w8 +; CHECK-NEXT: mov w8, v1.s[3] +; CHECK-NEXT: mov v0.b[6], w9 +; CHECK-NEXT: mov v0.b[7], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir --- a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir +++ b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir @@ -1,14 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -run-pass=aarch64-mi-peephole-opt -mtriple=aarch64-unknown-linux -verify-machineinstrs -o - %s | FileCheck %s --- | - source_filename = "/Users/nilanjana/Documents/code/llvm-project/llvm/test/CodeGen/AArch64/tmp.ll" - - ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) - declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>) #0 - - ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) - declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>) #0 - define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) { entry: ret void @@ -79,18 +71,18 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[UADDLVv4i32v]], %subreg.dsub ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[INSERT_SUBREG]].dsub - ; CHECK-NEXT: [[INSvi64gpr:%[0-9]+]]:fpr128 = INSvi64gpr [[MOVIv2d_ns]], 0, killed [[COPY1]] + ; CHECK-NEXT: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[MOVIv2d_ns]], 0, [[INSERT_SUBREG]], 0 ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 0 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[MOVID]], %subreg.dsub - ; CHECK-NEXT: [[UCVTFv2f64_:%[0-9]+]]:fpr128 = nofpexcept UCVTFv2f64 killed [[INSvi64gpr]], implicit $fpcr + ; CHECK-NEXT: [[UCVTFv2f64_:%[0-9]+]]:fpr128 = nofpexcept UCVTFv2f64 killed [[INSvi64lane]], implicit $fpcr ; CHECK-NEXT: [[FCVTNv2i32_:%[0-9]+]]:fpr64 = nofpexcept FCVTNv2i32 killed [[UCVTFv2f64_]], implicit $fpcr ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], killed [[FCVTNv2i32_]], %subreg.dsub - ; CHECK-NEXT: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG2]], 1, killed [[INSERT_SUBREG1]], 0 + ; CHECK-NEXT: [[INSvi64lane1:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG2]], 1, killed [[INSERT_SUBREG1]], 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub ; CHECK-NEXT: STRDui killed [[COPY2]], [[COPY]], 2 :: (store (s64) into %ir.0 + 16) - ; CHECK-NEXT: STRQui killed [[INSvi64lane]], [[COPY]], 0 :: (store (s128) into %ir.0, align 8) + ; CHECK-NEXT: STRQui killed [[INSvi64lane1]], [[COPY]], 0 :: (store (s128) into %ir.0, align 8) ; CHECK-NEXT: RET_ReallyLR %0:gpr64common = COPY $x0 %1:fpr128 = MOVIv2d_ns 0 @@ -146,8 +138,8 @@ ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 0 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[MOVID]], %subreg.dsub - ; CHECK-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG1]], 0, killed [[COPY1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr]].dsub + ; CHECK-NEXT: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[INSERT_SUBREG1]], 0, [[INSERT_SUBREG]], 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[INSvi32lane]].dsub ; CHECK-NEXT: [[UCVTFv2f32_:%[0-9]+]]:fpr64 = nofpexcept UCVTFv2f32 killed [[COPY2]], implicit $fpcr ; CHECK-NEXT: STRDui killed [[UCVTFv2f32_]], [[COPY]], 0 :: (store (s64) into %ir.0) ; CHECK-NEXT: RET_ReallyLR @@ -202,8 +194,8 @@ ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 0 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[MOVID]], %subreg.dsub - ; CHECK-NEXT: [[INSvi16gpr:%[0-9]+]]:fpr128 = INSvi16gpr [[INSERT_SUBREG1]], 0, killed [[COPY1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[INSvi16gpr]].dsub + ; CHECK-NEXT: [[INSvi16lane:%[0-9]+]]:fpr128 = INSvi16lane [[INSERT_SUBREG1]], 0, [[INSERT_SUBREG]], 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[INSvi16lane]].dsub ; CHECK-NEXT: [[USHLLv4i16_shift:%[0-9]+]]:fpr128 = USHLLv4i16_shift killed [[COPY2]], 0 ; CHECK-NEXT: [[UCVTFv4f32_:%[0-9]+]]:fpr128 = nofpexcept UCVTFv4f32 killed [[USHLLv4i16_shift]], implicit $fpcr ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $wzr @@ -275,8 +267,8 @@ ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 0 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[MOVID]], %subreg.dsub - ; CHECK-NEXT: [[INSvi8gpr:%[0-9]+]]:fpr128 = INSvi8gpr [[INSERT_SUBREG1]], 0, killed [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[INSvi8gpr]].dsub + ; CHECK-NEXT: [[INSvi8lane:%[0-9]+]]:fpr128 = INSvi8lane [[INSERT_SUBREG1]], 0, [[INSERT_SUBREG]], 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[INSvi8lane]].dsub ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF ; CHECK-NEXT: [[ZIP1v8i8_:%[0-9]+]]:fpr64 = ZIP1v8i8 killed [[COPY3]], killed [[DEF2]] ; CHECK-NEXT: [[BICv4i16_:%[0-9]+]]:fpr64 = BICv4i16 [[ZIP1v8i8_]], 255, 8 @@ -338,8 +330,8 @@ ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[UADDLVv8i16v]], %subreg.ssub ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].ssub - ; CHECK-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[MOVIv2d_ns]], 2, killed [[COPY1]] - ; CHECK-NEXT: [[UCVTFv4f32_:%[0-9]+]]:fpr128 = nofpexcept UCVTFv4f32 killed [[INSvi32gpr]], implicit $fpcr + ; CHECK-NEXT: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[MOVIv2d_ns]], 2, [[INSERT_SUBREG]], 0 + ; CHECK-NEXT: [[UCVTFv4f32_:%[0-9]+]]:fpr128 = nofpexcept UCVTFv4f32 killed [[INSvi32lane]], implicit $fpcr ; CHECK-NEXT: STRQui killed [[UCVTFv4f32_]], [[COPY]], 0 :: (store (s128) into %ir.0, align 8) ; CHECK-NEXT: RET_ReallyLR %0:gpr64common = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -574,19 +574,18 @@ ; CHECK-NEXT: ldr s1, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0 ; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov w9, v1.s[1] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: mov v0.h[0], w8 -; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: mov v0.h[0], v1.h[0] +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z2.d] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: str s0, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -18,10 +18,9 @@ ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h ; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov w9, v1.s[1] -; CHECK-NEXT: mov v0.h[0], w8 -; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: mov v0.h[0], v1.h[0] +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -539,11 +539,10 @@ ; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: sshll v2.4s, v2.4h, #0 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov w9, v2.s[1] +; CHECK-NEXT: mov v0.h[0], v2.h[0] +; CHECK-NEXT: mov w8, v2.s[1] ; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: mov v0.h[0], w8 -; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: sunpklo z0.s, z0.h diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -18,10 +18,9 @@ ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h ; CHECK-NEXT: sshll v2.4s, v2.4h, #0 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov w9, v2.s[1] -; CHECK-NEXT: mov v0.h[0], w8 -; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: mov v0.h[0], v2.h[0] +; CHECK-NEXT: mov w8, v2.s[1] +; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0