Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2099,10 +2099,8 @@ default: break; case TargetOpcode::COPY: { - // FPR64 copies will by lowered to ORR.16b Register DstReg = MI.getOperand(0).getReg(); - return (AArch64::FPR64RegClass.contains(DstReg) || - AArch64::FPR128RegClass.contains(DstReg)); + return AArch64::FPR128RegClass.contains(DstReg); } case AArch64::ORRv16i8: if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) { @@ -3503,77 +3501,37 @@ if (AArch64::FPR64RegClass.contains(DestReg) && AArch64::FPR64RegClass.contains(SrcReg)) { - if (Subtarget.hasNEON()) { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, - &AArch64::FPR128RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, - &AArch64::FPR128RegClass); - BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { - BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } + BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); return; } if (AArch64::FPR32RegClass.contains(DestReg) && AArch64::FPR32RegClass.contains(SrcReg)) { - if (Subtarget.hasNEON()) { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, - &AArch64::FPR128RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, - &AArch64::FPR128RegClass); - BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { - BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); return; } if (AArch64::FPR16RegClass.contains(DestReg) && AArch64::FPR16RegClass.contains(SrcReg)) { - if (Subtarget.hasNEON()) { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, - &AArch64::FPR128RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, - &AArch64::FPR128RegClass); - BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, - &AArch64::FPR32RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, - &AArch64::FPR32RegClass); - BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } + DestReg = + RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass); + SrcReg = + RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); return; } if (AArch64::FPR8RegClass.contains(DestReg) && AArch64::FPR8RegClass.contains(SrcReg)) { - if (Subtarget.hasNEON()) { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, - &AArch64::FPR128RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, - &AArch64::FPR128RegClass); - BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { - DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, - &AArch64::FPR32RegClass); - SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, - &AArch64::FPR32RegClass); - BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } + DestReg = + RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass); + SrcReg = + RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); return; } Index: llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll +++ llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { Index: llvm/test/CodeGen/AArch64/arm64-aapcs.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-aapcs.ll +++ llvm/test/CodeGen/AArch64/arm64-aapcs.ll @@ -124,7 +124,7 @@ ; Check that f16 can be passed and returned (ACLE 2.0 extension) define half @test_half(float, half %arg) { ; CHECK-LABEL: test_half: -; CHECK: mov v0.16b, v1.16b +; CHECK: fmov s0, s1 ret half %arg; } @@ -138,7 +138,7 @@ ; Check that v4f16 can be passed and returned in registers define dso_local <4 x half> @test_v4_half_register(float, <4 x half> %arg) { ; CHECK-LABEL: test_v4_half_register: -; CHECK: mov v0.16b, v1.16b +; CHECK: fmov d0, d1 ret <4 x half> %arg; } Index: llvm/test/CodeGen/AArch64/arm64-neon-copy.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -224,7 +224,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v1.b[7], v0.b[2] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %tmp3 = extractelement <16 x i8> %tmp1, i32 2 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 @@ -236,7 +236,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v1.h[3], v0.h[2] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 @@ -248,7 +248,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v1.s[1], v0.s[2] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %tmp3 = extractelement <4 x i32> %tmp1, i32 2 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 @@ -260,7 +260,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v1.d[0], v0.d[0] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %tmp3 = extractelement <2 x i64> %tmp1, i32 0 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 @@ -272,7 +272,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v1.s[1], v0.s[2] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %tmp3 = extractelement <4 x float> %tmp1, i32 2 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 @@ -296,7 +296,7 @@ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v1.b[4], v0.b[2] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %tmp3 = extractelement <8 x i8> %tmp1, i32 2 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 @@ -309,7 +309,7 @@ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v1.h[3], v0.h[2] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 @@ -322,7 +322,7 @@ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v1.s[1], v0.s[0] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %tmp3 = extractelement <2 x i32> %tmp1, i32 0 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 @@ -335,7 +335,7 @@ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v1.d[0], v0.d[0] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %tmp3 = extractelement <1 x i64> %tmp1, i32 0 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 @@ -348,7 +348,7 @@ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v1.s[1], v0.s[0] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %tmp3 = extractelement <2 x float> %tmp1, i32 0 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 @@ -584,7 +584,7 @@ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v1.b[7], v0.b[0] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> ret <8 x i8> %vset_lane @@ -1236,7 +1236,7 @@ ; CHECK-NEXT: mov v1.h[1], v0.h[1] ; CHECK-NEXT: mov v1.h[2], v0.h[2] ; CHECK-NEXT: mov v1.h[3], v0.h[3] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: add sp, sp, #16 // =16 ; CHECK-NEXT: ret %tmp = extractelement <8 x i16> %x, i32 %idx @@ -1264,7 +1264,7 @@ ; CHECK-NEXT: mov v1.h[1], v0.h[1] ; CHECK-NEXT: mov v1.h[2], v0.h[2] ; CHECK-NEXT: mov v1.h[3], v0.h[3] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: add sp, sp, #16 // =16 ; CHECK-NEXT: ret %tmp = extractelement <8 x i16> %x, i32 0 Index: llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -208,9 +208,9 @@ ; ; FAST-LABEL: test_vcvt_f16_f32: ; FAST: // %bb.0: -; FAST-NEXT: mov.16b v1, v0 +; FAST-NEXT: fmov d1, d0 ; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: mov.16b v0, v1 +; FAST-NEXT: fmov d0, d1 ; FAST-NEXT: // kill: def $s0 killed $s0 killed $q0 ; FAST-NEXT: fcvt h0, s0 ; FAST-NEXT: ret @@ -237,9 +237,9 @@ ; ; FAST-LABEL: test_vcvt_high_f32_f64: ; FAST: // %bb.0: -; FAST-NEXT: mov.16b v2, v0 +; FAST-NEXT: fmov d2, d0 ; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: mov.16b v0, v2 +; FAST-NEXT: fmov d0, d2 ; FAST-NEXT: fcvtn2 v0.4s, v1.2d ; FAST-NEXT: ret ; @@ -276,9 +276,9 @@ ; ; FAST-LABEL: test_vcvtx_high_f32_f64: ; FAST: // %bb.0: -; FAST-NEXT: mov.16b v2, v0 +; FAST-NEXT: fmov d2, d0 ; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: mov.16b v0, v2 +; FAST-NEXT: fmov d0, d2 ; FAST-NEXT: fcvtxn2 v0.4s, v1.2d ; FAST-NEXT: ret ; @@ -313,7 +313,7 @@ ; FAST-NEXT: fcvt h1, s0 ; FAST-NEXT: // implicit-def: $w0 ; FAST-NEXT: fmov s0, w0 -; FAST-NEXT: mov.16b v0, v1 +; FAST-NEXT: fmov s0, s1 ; FAST-NEXT: fmov w0, s0 ; FAST-NEXT: // kill: def $w1 killed $w0 ; FAST-NEXT: ret Index: llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll +++ llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll @@ -205,7 +205,7 @@ ; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov.s v1[1], v0[0] -; CHECK-NEXT: mov.16b v0, v1 +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %v.0 = insertelement <2 x float> , float %a, i32 1 ret <2 x float> %v.0 Index: llvm/test/CodeGen/AArch64/bf16-vector-bitcast.ll =================================================================== --- llvm/test/CodeGen/AArch64/bf16-vector-bitcast.ll +++ llvm/test/CodeGen/AArch64/bf16-vector-bitcast.ll @@ -4,7 +4,7 @@ define <4 x i16> @v4bf16_to_v4i16(float, <4 x bfloat> %a) nounwind { ; CHECK-LABEL: v4bf16_to_v4i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x bfloat> %a to <4 x i16> @@ -14,7 +14,7 @@ define <2 x i32> @v4bf16_to_v2i32(float, <4 x bfloat> %a) nounwind { ; CHECK-LABEL: v4bf16_to_v2i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x bfloat> %a to <2 x i32> @@ -24,7 +24,7 @@ define <1 x i64> @v4bf16_to_v1i64(float, <4 x bfloat> %a) nounwind { ; CHECK-LABEL: v4bf16_to_v1i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x bfloat> %a to <1 x i64> @@ -44,7 +44,7 @@ define <2 x float> @v4bf16_to_v2float(float, <4 x bfloat> %a) nounwind { ; CHECK-LABEL: v4bf16_to_v2float: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x bfloat> %a to <2 x float> @@ -54,7 +54,7 @@ define <1 x double> @v4bf16_to_v1double(float, <4 x bfloat> %a) nounwind { ; CHECK-LABEL: v4bf16_to_v1double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x bfloat> %a to <1 x double> @@ -64,7 +64,7 @@ define double @v4bf16_to_double(float, <4 x bfloat> %a) nounwind { ; CHECK-LABEL: v4bf16_to_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x bfloat> %a to double @@ -75,7 +75,7 @@ define <4 x bfloat> @v4i16_to_v4bf16(float, <4 x i16> %a) nounwind { ; CHECK-LABEL: v4i16_to_v4bf16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x i16> %a to <4 x bfloat> @@ -85,7 +85,7 @@ define <4 x bfloat> @v2i32_to_v4bf16(float, <2 x i32> %a) nounwind { ; CHECK-LABEL: v2i32_to_v4bf16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <2 x i32> %a to <4 x bfloat> @@ -95,7 +95,7 @@ define <4 x bfloat> @v1i64_to_v4bf16(float, <1 x i64> %a) nounwind { ; CHECK-LABEL: v1i64_to_v4bf16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <1 x i64> %a to <4 x bfloat> @@ -115,7 +115,7 @@ define <4 x bfloat> @v2float_to_v4bf16(float, <2 x float> %a) nounwind { ; CHECK-LABEL: v2float_to_v4bf16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <2 x float> %a to <4 x bfloat> @@ -125,7 +125,7 @@ define <4 x bfloat> @v1double_to_v4bf16(float, <1 x double> %a) nounwind { ; CHECK-LABEL: v1double_to_v4bf16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <1 x double> %a to <4 x bfloat> @@ -135,7 +135,7 @@ define <4 x bfloat> @double_to_v4bf16(float, double %a) nounwind { ; CHECK-LABEL: double_to_v4bf16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast double %a to <4 x bfloat> Index: llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll +++ llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll @@ -150,7 +150,7 @@ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-NEXT: mov v1.h[1], v0.h[0] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %vset_lane = insertelement <4 x bfloat> %v, bfloat %a, i32 1 Index: llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll =================================================================== --- llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll +++ llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: mov v1.s[1], w8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %y = bitcast <2 x half> %x to <2 x i16> ret <2 x i16> %y Index: llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll =================================================================== --- llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -664,7 +664,7 @@ ; CHECK-NEXT: cmp w19, #0 // =0 ; CHECK-NEXT: cinc w0, w19, gt ; CHECK-NEXT: mov w1, #2 -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: bl xoo ; CHECK-NEXT: fmov d0, #-1.00000000 ; CHECK-NEXT: fadd d0, d8, d0 Index: llvm/test/CodeGen/AArch64/f16-instructions.ll =================================================================== --- llvm/test/CodeGen/AArch64/f16-instructions.ll +++ llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -144,9 +144,9 @@ ; CHECK-COMMON-LABEL: test_call_flipped: ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! ; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: mov.16b v2, v0 -; CHECK-COMMON-NEXT: mov.16b v0, v1 -; CHECK-COMMON-NEXT: mov.16b v1, v2 +; CHECK-COMMON-NEXT: fmov s2, s0 +; CHECK-COMMON-NEXT: fmov s0, s1 +; CHECK-COMMON-NEXT: fmov s1, s2 ; CHECK-COMMON-NEXT: bl {{_?}}test_callee ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 ; CHECK-COMMON-NEXT: ret @@ -156,9 +156,9 @@ } ; CHECK-COMMON-LABEL: test_tailcall_flipped: -; CHECK-COMMON-NEXT: mov.16b v2, v0 -; CHECK-COMMON-NEXT: mov.16b v0, v1 -; CHECK-COMMON-NEXT: mov.16b v1, v2 +; CHECK-COMMON-NEXT: fmov s2, s0 +; CHECK-COMMON-NEXT: fmov s0, s1 +; CHECK-COMMON-NEXT: fmov s1, s2 ; CHECK-COMMON-NEXT: b {{_?}}test_callee define half @test_tailcall_flipped(half %a, half %b) #0 { %r = tail call half @test_callee(half %b, half %a) @@ -542,11 +542,11 @@ ; CHECK-COMMON: mov x[[PTR:[0-9]+]], x0 ; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x0] ; CHECK-COMMON: [[LOOP:LBB[0-9_]+]]: -; CHECK-COMMON: mov.16b v[[R:[0-9]+]], v[[AB]] +; CHECK-COMMON: fmov s[[R:[0-9]+]], s[[AB]] ; CHECK-COMMON: ldr h[[AB]], [x[[PTR]]] ; CHECK-COMMON: mov x0, x[[PTR]] ; CHECK-COMMON: bl {{_?}}test_dummy -; CHECK-COMMON: mov.16b v0, v[[R]] +; CHECK-COMMON: fmov s0, s[[R]] ; CHECK-COMMON: ret define half @test_phi(half* %p1) #0 { entry: Index: llvm/test/CodeGen/AArch64/fadd-combines.ll =================================================================== --- llvm/test/CodeGen/AArch64/fadd-combines.ll +++ llvm/test/CodeGen/AArch64/fadd-combines.ll @@ -116,11 +116,11 @@ ; CHECK-NEXT: fmov d2, #-2.00000000 ; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fadd d8, d0, d1 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl use ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %mul = fmul double %b, -2.000000e+00 @@ -132,13 +132,13 @@ define float @fadd_const_multiuse_fmf(float %x) { ; CHECK-LABEL: fadd_const_multiuse_fmf: ; CHECK: // %bb.0: -; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144 -; CHECK-DAG: mov [[W42:w[0-9]+]], #1109917696 -; CHECK-DAG: fmov [[FP59:s[0-9]+]], [[W59]] -; CHECK-DAG: fmov [[FP42:s[0-9]+]], [[W42]] -; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP42]] -; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], s0, [[FP59]] -; CHECK-NEXT: fadd s0, [[TMP1]], [[TMP2]] +; CHECK-NEXT: mov w8, #1109917696 +; CHECK-NEXT: mov w9, #1114374144 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fadd s1, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s1, s0 ; CHECK-NEXT: ret %a1 = fadd float %x, 42.0 %a2 = fadd nsz reassoc float %a1, 17.0 @@ -150,13 +150,13 @@ define float @fadd_const_multiuse_attr(float %x) { ; CHECK-LABEL: fadd_const_multiuse_attr: ; CHECK: // %bb.0: -; CHECK-DAG: mov [[W17:w[0-9]+]], #1109917696 -; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144 -; CHECK-NEXT: fmov [[FP17:s[0-9]+]], [[W17]] -; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]] -; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP17]] -; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], s0, [[FP59]] -; CHECK-NEXT: fadd s0, [[TMP1]], [[TMP2]] +; CHECK-NEXT: mov w8, #1109917696 +; CHECK-NEXT: mov w9, #1114374144 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fadd s1, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s1, s0 ; CHECK-NEXT: ret %a1 = fadd fast float %x, 42.0 %a2 = fadd fast float %a1, 17.0 Index: llvm/test/CodeGen/AArch64/fast-isel-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/fast-isel-select.ll +++ llvm/test/CodeGen/AArch64/fast-isel-select.ll @@ -68,7 +68,7 @@ ; Now test the folding of all compares. define float @select_fcmp_false(float %x, float %a, float %b) { ; CHECK-LABEL: select_fcmp_false -; CHECK: mov.16b {{v[0-9]+}}, v2 +; CHECK: fmov {{s[0-9]+}}, s2 %1 = fcmp ogt float %x, %x %2 = select i1 %1, float %a, float %b ret float %2 @@ -196,7 +196,7 @@ define float @select_fcmp_true(float %x, float %a, float %b) { ; CHECK-LABEL: select_fcmp_true -; CHECK: mov.16b {{v[0-9]+}}, v1 +; CHECK: fmov {{s[0-9]+}}, s1 %1 = fcmp ueq float %x, %x %2 = select i1 %1, float %a, float %b ret float %2 Index: llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -138,7 +138,7 @@ define <4 x half> @bitcast_i_to_h(float, <4 x i16> %a) { ; CHECK-COMMON-LABEL: bitcast_i_to_h: -; CHECK-COMMON: mov v0.16b, v1.16b +; CHECK-COMMON: fmov d0, d1 ; CHECK-COMMON-NEXT: ret %2 = bitcast <4 x i16> %a to <4 x half> ret <4 x half> %2 @@ -146,7 +146,7 @@ define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) { ; CHECK-COMMON-LABEL: bitcast_h_to_i: -; CHECK-COMMON: mov v0.16b, v1.16b +; CHECK-COMMON: fmov d0, d1 ; CHECK-COMMON-NEXT: ret %2 = bitcast <4 x half> %a to <4 x i16> ret <4 x i16> %2 Index: llvm/test/CodeGen/AArch64/fp16-vector-bitcast.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp16-vector-bitcast.ll +++ llvm/test/CodeGen/AArch64/fp16-vector-bitcast.ll @@ -4,7 +4,7 @@ define <4 x i16> @v4f16_to_v4i16(float, <4 x half> %a) #0 { ; CHECK-LABEL: v4f16_to_v4i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x half> %a to <4 x i16> @@ -14,7 +14,7 @@ define <2 x i32> @v4f16_to_v2i32(float, <4 x half> %a) #0 { ; CHECK-LABEL: v4f16_to_v2i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x half> %a to <2 x i32> @@ -24,7 +24,7 @@ define <1 x i64> @v4f16_to_v1i64(float, <4 x half> %a) #0 { ; CHECK-LABEL: v4f16_to_v1i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x half> %a to <1 x i64> @@ -44,7 +44,7 @@ define <2 x float> @v4f16_to_v2float(float, <4 x half> %a) #0 { ; CHECK-LABEL: v4f16_to_v2float: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x half> %a to <2 x float> @@ -54,7 +54,7 @@ define <1 x double> @v4f16_to_v1double(float, <4 x half> %a) #0 { ; CHECK-LABEL: v4f16_to_v1double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x half> %a to <1 x double> @@ -64,7 +64,7 @@ define double @v4f16_to_double(float, <4 x half> %a) #0 { ; CHECK-LABEL: v4f16_to_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x half> %a to double @@ -75,7 +75,7 @@ define <4 x half> @v4i16_to_v4f16(float, <4 x i16> %a) #0 { ; CHECK-LABEL: v4i16_to_v4f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <4 x i16> %a to <4 x half> @@ -85,7 +85,7 @@ define <4 x half> @v2i32_to_v4f16(float, <2 x i32> %a) #0 { ; CHECK-LABEL: v2i32_to_v4f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <2 x i32> %a to <4 x half> @@ -95,7 +95,7 @@ define <4 x half> @v1i64_to_v4f16(float, <1 x i64> %a) #0 { ; CHECK-LABEL: v1i64_to_v4f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <1 x i64> %a to <4 x half> @@ -115,7 +115,7 @@ define <4 x half> @v2float_to_v4f16(float, <2 x float> %a) #0 { ; CHECK-LABEL: v2float_to_v4f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <2 x float> %a to <4 x half> @@ -125,7 +125,7 @@ define <4 x half> @v1double_to_v4f16(float, <1 x double> %a) #0 { ; CHECK-LABEL: v1double_to_v4f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast <1 x double> %a to <4 x half> @@ -135,7 +135,7 @@ define <4 x half> @double_to_v4f16(float, double %a) #0 { ; CHECK-LABEL: double_to_v4f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %1 = bitcast double %a to <4 x half> Index: llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -148,7 +148,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov s8, s0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-251658240 ; CHECK-NEXT: fmov s0, w8 @@ -177,7 +177,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov s8, s0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-16777216 ; CHECK-NEXT: fmov s0, w8 @@ -345,7 +345,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov x8, #-4170333254945079296 ; CHECK-NEXT: fmov d0, x8 @@ -374,7 +374,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov x8, #-4044232465378705408 ; CHECK-NEXT: fmov d0, x8 @@ -562,7 +562,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-251658240 @@ -592,7 +592,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-16777216 Index: llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -1022,7 +1022,7 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-251658240 ; CHECK-NEXT: mov w9, #1895825407 @@ -1089,7 +1089,7 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-16777216 ; CHECK-NEXT: mov w9, #2130706431 @@ -1354,7 +1354,7 @@ ; CHECK-NEXT: .cfi_offset b10, -64 ; CHECK-NEXT: mov d8, v0.d[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov x8, #-4170333254945079296 ; CHECK-NEXT: mov x9, #5053038781909696511 @@ -1420,7 +1420,7 @@ ; CHECK-NEXT: .cfi_offset b10, -64 ; CHECK-NEXT: mov d8, v0.d[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov x8, #-4044232465378705408 ; CHECK-NEXT: mov x9, #5179139571476070399 @@ -1833,7 +1833,7 @@ ; CHECK-NEXT: mov h1, v0.h[1] ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #-251658240 @@ -1851,7 +1851,7 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x19, xzr, x8, vs ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti @@ -1865,7 +1865,7 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x21, xzr, x8, vs ; CHECK-NEXT: csel x22, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti @@ -1878,7 +1878,7 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x23, xzr, x8, vs ; CHECK-NEXT: csel x24, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti @@ -1941,7 +1941,7 @@ ; CHECK-NEXT: mov h1, v0.h[1] ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #-16777216 @@ -1959,7 +1959,7 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x19, xzr, x8, vs ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti @@ -1973,7 +1973,7 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x21, xzr, x8, vs ; CHECK-NEXT: csel x22, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti @@ -1986,7 +1986,7 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x23, xzr, x8, vs ; CHECK-NEXT: csel x24, xzr, x9, vs ; CHECK-NEXT: bl __fixsfti Index: llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -129,7 +129,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov s8, s0 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload @@ -152,7 +152,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov s8, s0 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload @@ -296,7 +296,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: mov x8, #5057542381537067007 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload @@ -319,7 +319,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: mov x8, #5183643171103440895 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload @@ -481,7 +481,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 @@ -505,7 +505,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 Index: llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -916,7 +916,7 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 ; CHECK-NEXT: fcmp s8, #0.0 @@ -967,7 +967,7 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 ; CHECK-NEXT: fcmp s8, #0.0 @@ -1187,7 +1187,7 @@ ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: mov d8, v0.d[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: mov x8, #5057542381537067007 ; CHECK-NEXT: fcmp d8, #0.0 @@ -1237,7 +1237,7 @@ ; CHECK-NEXT: .cfi_offset b9, -48 ; CHECK-NEXT: mov d8, v0.d[1] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: mov x8, #5183643171103440895 ; CHECK-NEXT: fcmp d8, #0.0 @@ -1579,7 +1579,7 @@ ; CHECK-NEXT: mov h1, v0.h[2] ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 @@ -1591,7 +1591,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: mov x25, #68719476735 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x19, x25, x10, gt ; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: bl __fixunssfti @@ -1602,7 +1602,7 @@ ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x21, x25, x9, gt ; CHECK-NEXT: csinv x22, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti @@ -1612,7 +1612,7 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csel x23, x25, x9, gt ; CHECK-NEXT: csinv x24, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti @@ -1665,7 +1665,7 @@ ; CHECK-NEXT: mov h1, v0.h[1] ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2139095039 @@ -1676,7 +1676,7 @@ ; CHECK-NEXT: csel x10, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csinv x19, x10, xzr, le ; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: bl __fixunssfti @@ -1687,7 +1687,7 @@ ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csinv x21, x9, xzr, le ; CHECK-NEXT: csinv x22, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti @@ -1697,7 +1697,7 @@ ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: csinv x23, x9, xzr, le ; CHECK-NEXT: csinv x24, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti Index: llvm/test/CodeGen/AArch64/machine-combiner.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-combiner.ll +++ llvm/test/CodeGen/AArch64/machine-combiner.ll @@ -220,13 +220,13 @@ define double @reassociate_adds_from_calls() { ; CHECK-LABEL: reassociate_adds_from_calls: ; CHECK: bl bar -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: bl bar -; CHECK-NEXT: mov v9.16b, v0.16b +; CHECK-NEXT: fmov d9, d0 ; CHECK-NEXT: bl bar -; CHECK-NEXT: mov v10.16b, v0.16b +; CHECK-NEXT: fmov d10, d0 ; CHECK-NEXT: bl bar -; CHECK: fadd d1, d8, d9 +; CHECK: fadd d1, d8, d9 ; CHECK-NEXT: fadd d0, d10, d0 ; CHECK-NEXT: fadd d0, d1, d0 %x0 = call double @bar() @@ -242,11 +242,11 @@ define double @already_reassociated() { ; CHECK-LABEL: already_reassociated: ; CHECK: bl bar -; CHECK-NEXT: mov v8.16b, v0.16b +; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: bl bar -; CHECK-NEXT: mov v9.16b, v0.16b +; CHECK-NEXT: fmov d9, d0 ; CHECK-NEXT: bl bar -; CHECK-NEXT: mov v10.16b, v0.16b +; CHECK-NEXT: fmov d10, d0 ; CHECK-NEXT: bl bar ; CHECK: fadd d1, d8, d9 ; CHECK-NEXT: fadd d0, d10, d0 Index: llvm/test/CodeGen/AArch64/mla_mls_merge.ll =================================================================== --- llvm/test/CodeGen/AArch64/mla_mls_merge.ll +++ llvm/test/CodeGen/AArch64/mla_mls_merge.ll @@ -6,7 +6,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umull v2.8h, v2.8b, v3.8b ; CHECK-NEXT: umlal v2.8h, v0.8b, v1.8b -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret entry: %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) @@ -22,7 +22,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v2.8h, v2.8b, v3.8b ; CHECK-NEXT: smlal v2.8h, v0.8b, v1.8b -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret entry: %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) @@ -38,7 +38,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umull v2.4s, v2.4h, v3.4h ; CHECK-NEXT: umlal v2.4s, v0.4h, v1.4h -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret entry: %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) @@ -54,7 +54,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v2.4s, v2.4h, v3.4h ; CHECK-NEXT: smlal v2.4s, v0.4h, v1.4h -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret entry: %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b) @@ -70,7 +70,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umull v2.2d, v2.2s, v3.2s ; CHECK-NEXT: umlal v2.2d, v0.2s, v1.2s -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret entry: %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b) @@ -86,7 +86,7 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v2.2d, v2.2s, v3.2s ; CHECK-NEXT: smlal v2.2d, v0.2s, v1.2s -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret entry: %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b) Index: llvm/test/CodeGen/AArch64/neon-mla-mls.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-mla-mls.ll +++ llvm/test/CodeGen/AArch64/neon-mla-mls.ll @@ -6,7 +6,7 @@ ; CHECK-LABEL: mla8xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <8 x i8> %A, %B; %tmp2 = add <8 x i8> %C, %tmp1; @@ -28,7 +28,7 @@ ; CHECK-LABEL: mla4xi16: ; CHECK: // %bb.0: ; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <4 x i16> %A, %B; %tmp2 = add <4 x i16> %C, %tmp1; @@ -50,7 +50,7 @@ ; CHECK-LABEL: mla2xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <2 x i32> %A, %B; %tmp2 = add <2 x i32> %C, %tmp1; @@ -72,7 +72,7 @@ ; CHECK-LABEL: mls8xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: mls v2.8b, v0.8b, v1.8b -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <8 x i8> %A, %B; %tmp2 = sub <8 x i8> %C, %tmp1; @@ -94,7 +94,7 @@ ; CHECK-LABEL: mls4xi16: ; CHECK: // %bb.0: ; CHECK-NEXT: mls v2.4h, v0.4h, v1.4h -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <4 x i16> %A, %B; %tmp2 = sub <4 x i16> %C, %tmp1; @@ -116,7 +116,7 @@ ; CHECK-LABEL: mls2xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: mls v2.2s, v0.2s, v1.2s -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <2 x i32> %A, %B; %tmp2 = sub <2 x i32> %C, %tmp1; @@ -140,7 +140,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.8b, v2.8b ; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <8 x i8> %A, %B; %tmp2 = sub <8 x i8> %tmp1, %C; @@ -164,7 +164,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.4h, v2.4h ; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <4 x i16> %A, %B; %tmp2 = sub <4 x i16> %tmp1, %C; @@ -188,7 +188,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: neg v2.2s, v2.2s ; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <2 x i32> %A, %B; %tmp2 = sub <2 x i32> %tmp1, %C; Index: llvm/test/CodeGen/AArch64/popcount.ll =================================================================== --- llvm/test/CodeGen/AArch64/popcount.ll +++ llvm/test/CodeGen/AArch64/popcount.ll @@ -8,12 +8,12 @@ ; CHECK-NEXT: ldr x8, [x0, #8] ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: uaddlv h1, v0.16b ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret Entry: @@ -34,21 +34,21 @@ ; CHECK-NEXT: ldr x9, [x0, #24] ; CHECK-NEXT: ldr d1, [x0, #16] ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: uaddlv h1, v0.16b ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: uaddlv h1, v0.16b ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret @@ -71,7 +71,7 @@ ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: uaddlv h1, v0.16b ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: // kill: def $x0 killed $w0 ; CHECK-NEXT: movi v0.2d, #0000000000000000 Index: llvm/test/CodeGen/AArch64/sqrt-fastmath.ll =================================================================== --- llvm/test/CodeGen/AArch64/sqrt-fastmath.ll +++ llvm/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -518,7 +518,7 @@ ; CHECK-NEXT: fmul d1, d0, d1 ; CHECK-NEXT: fcsel d0, d0, d1, eq ; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %sqrt = call fast double @llvm.sqrt.f64(double %x) store double %sqrt, double* %p Index: llvm/test/CodeGen/AArch64/swift-return.ll =================================================================== --- llvm/test/CodeGen/AArch64/swift-return.ll +++ llvm/test/CodeGen/AArch64/swift-return.ll @@ -234,9 +234,9 @@ } ; CHECK-LABEL: _gen10 -; CHECK: mov.16b v1, v0 -; CHECK: mov.16b v2, v0 -; CHECK: mov.16b v3, v0 +; CHECK: fmov d1, d0 +; CHECK: fmov d2, d0 +; CHECK: fmov d3, d0 ; CHECK: mov w1, w0 ; CHECK: mov w2, w0 ; CHECK: mov w3, w0 @@ -278,7 +278,7 @@ ; CHECK-LABEL: _test12 ; CHECK: fadd.4s v0, v0, v1 ; CHECK: fadd.4s v0, v0, v2 -; CHECK: mov.16b v1, v3 +; CHECK: fmov s1, s3 define swiftcc { <4 x float>, float } @test12() #0 { entry: %call = call swiftcc { <4 x float>, <4 x float>, <4 x float>, float } @gen12() Index: llvm/test/CodeGen/AArch64/urem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-vector-lkk.ll +++ llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -171,7 +171,7 @@ ; CHECK-NEXT: mov v1.h[1], w10 ; CHECK-NEXT: mov v1.h[2], w9 ; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -208,7 +208,7 @@ ; CHECK-NEXT: mov v1.h[2], w8 ; CHECK-NEXT: msub w8, w11, w9, w10 ; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 Index: llvm/test/CodeGen/AArch64/vec-libcalls.ll =================================================================== --- llvm/test/CodeGen/AArch64/vec-libcalls.ll +++ llvm/test/CodeGen/AArch64/vec-libcalls.ll @@ -145,31 +145,31 @@ ; CHECK-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v4.16b -; CHECK-NEXT: mov v9.16b, v3.16b -; CHECK-NEXT: mov v10.16b, v2.16b -; CHECK-NEXT: mov v11.16b, v1.16b +; CHECK-NEXT: fmov s8, s4 +; CHECK-NEXT: fmov s9, s3 +; CHECK-NEXT: fmov s10, s2 +; CHECK-NEXT: fmov s11, s1 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v12.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v11.16b +; CHECK-NEXT: fmov s12, s0 +; CHECK-NEXT: fmov s0, s11 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v11.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v10.16b +; CHECK-NEXT: fmov s11, s0 +; CHECK-NEXT: fmov s0, s10 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v10.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v9.16b +; CHECK-NEXT: fmov s10, s0 +; CHECK-NEXT: fmov s0, s9 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v9.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s9, s0 +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v1.16b, v11.16b -; CHECK-NEXT: mov v2.16b, v10.16b -; CHECK-NEXT: mov v3.16b, v9.16b +; CHECK-NEXT: fmov s1, s11 +; CHECK-NEXT: fmov s2, s10 +; CHECK-NEXT: fmov s3, s9 ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload -; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v12.16b +; CHECK-NEXT: fmov s4, s0 +; CHECK-NEXT: fmov s0, s12 ; CHECK-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload ; CHECK-NEXT: ret %r = call <5 x float> @llvm.sin.v5f32(<5 x float> %x) @@ -183,36 +183,36 @@ ; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v5.16b -; CHECK-NEXT: mov v9.16b, v4.16b -; CHECK-NEXT: mov v10.16b, v3.16b -; CHECK-NEXT: mov v11.16b, v2.16b -; CHECK-NEXT: mov v12.16b, v1.16b +; CHECK-NEXT: fmov s8, s5 +; CHECK-NEXT: fmov s9, s4 +; CHECK-NEXT: fmov s10, s3 +; CHECK-NEXT: fmov s11, s2 +; CHECK-NEXT: fmov s12, s1 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v13.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v12.16b +; CHECK-NEXT: fmov s13, s0 +; CHECK-NEXT: fmov s0, s12 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v12.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v11.16b +; CHECK-NEXT: fmov s12, s0 +; CHECK-NEXT: fmov s0, s11 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v11.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v10.16b +; CHECK-NEXT: fmov s11, s0 +; CHECK-NEXT: fmov s0, s10 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v10.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v9.16b +; CHECK-NEXT: fmov s10, s0 +; CHECK-NEXT: fmov s0, s9 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v9.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov s9, s0 +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl sinf -; CHECK-NEXT: mov v2.16b, v11.16b -; CHECK-NEXT: mov v3.16b, v10.16b -; CHECK-NEXT: mov v4.16b, v9.16b +; CHECK-NEXT: fmov s2, s11 +; CHECK-NEXT: fmov s3, s10 +; CHECK-NEXT: fmov s4, s9 ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v5.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v13.16b -; CHECK-NEXT: mov v1.16b, v12.16b +; CHECK-NEXT: fmov s5, s0 +; CHECK-NEXT: fmov s0, s13 +; CHECK-NEXT: fmov s1, s12 ; CHECK-NEXT: ldp d13, d12, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret %r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x) @@ -225,20 +225,20 @@ ; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill -; CHECK-NEXT: mov v8.16b, v2.16b -; CHECK-NEXT: mov v9.16b, v1.16b +; CHECK-NEXT: fmov d8, d2 +; CHECK-NEXT: fmov d9, d1 ; CHECK-NEXT: bl sin -; CHECK-NEXT: mov v10.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v9.16b +; CHECK-NEXT: fmov d10, d0 +; CHECK-NEXT: fmov d0, d9 ; CHECK-NEXT: bl sin -; CHECK-NEXT: mov v9.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v8.16b +; CHECK-NEXT: fmov d9, d0 +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: bl sin -; CHECK-NEXT: mov v1.16b, v9.16b +; CHECK-NEXT: fmov d1, d9 ; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v10.16b +; CHECK-NEXT: fmov d2, d0 +; CHECK-NEXT: fmov d0, d10 ; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %r = call <3 x double> @llvm.sin.v3f64(<3 x double> %x) Index: llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll +++ llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py - ; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s ; Test LSR for giving small constants, which get re-associated as unfolded @@ -34,7 +33,7 @@ ; CHECK-NEXT: fmov s0, #-7.00000000 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_5: // %cleanup2 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: ret entry: %cmp11 = icmp eq i64 %start, 0 @@ -81,7 +80,7 @@ ; CHECK-NEXT: fmov s0, #-7.00000000 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_5: // %cleanup4 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: ret entry: %cmp14 = icmp eq i64 %start, 0