Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -5128,22 +5128,26 @@ // Natural vector casts (64 bit) def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>; def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; @@ -5158,22 +5162,26 @@ // Natural vector casts (128 bit) def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; Index: test/CodeGen/AArch64/fp16-vector-nvcast.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/fp16-vector-nvcast.ll @@ -0,0 +1,95 @@ +; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s + +; Test pattern (v4f16 (AArch64NvCast (v2i32 FPR64:$src))) +define void @nvcast_v2i32(<4 x half>* %a) { +; CHECK-LABEL: nvcast_v2i32: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: movi v[[REG:[0-9]+]].2s, #0xab, lsl #16 +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> , <4 x half>* %a + ret void +} + + +; Test pattern (v4f16 (AArch64NvCast (v4i16 FPR64:$src))) +define void @nvcast_v4i16(<4 x half>* %a) { +; CHECK-LABEL: nvcast_v4i16: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: movi v[[REG:[0-9]+]].4h, #0xab +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> , <4 x half>* %a + ret void +} + + +; Test pattern (v4f16 (AArch64NvCast (v8i8 FPR64:$src))) +define void @nvcast_v8i8(<4 x half>* %a) { +; CHECK-LABEL: nvcast_v8i8: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: movi v[[REG:[0-9]+]].8b, #0xab +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> , <4 x half>* %a + ret void +} + + +; Test pattern (v4f16 (AArch64NvCast (f64 FPR64:$src))) +define void @nvcast_f64(<4 x half>* %a) { +; CHECK-LABEL: nvcast_f64: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: movi d[[REG:[0-9]+]], #0000000000000000 +; CHECK-NEXT: str d[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <4 x half> zeroinitializer, <4 x half>* %a + ret void +} + +; Test pattern (v8f16 (AArch64NvCast (v4i32 FPR128:$src))) +define void @nvcast_v4i32(<8 x half>* %a) { +; CHECK-LABEL: nvcast_v4i32: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: movi v[[REG:[0-9]+]].4s, #0xab, lsl #16 +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> , <8 x half>* %a + ret void +} + + +; Test pattern (v8f16 (AArch64NvCast (v8i16 FPR128:$src))) +define void @nvcast_v8i16(<8 x half>* %a) { +; CHECK-LABEL: nvcast_v8i16: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: movi v[[REG:[0-9]+]].8h, #0xab +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> , <8 x half>* %a + ret void +} + + +; Test pattern (v8f16 (AArch64NvCast (v16i8 FPR128:$src))) +define void @nvcast_v16i8(<8 x half>* %a) { +; CHECK-LABEL: nvcast_v16i8: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: movi v[[REG:[0-9]+]].16b, #0xab +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> , <8 x half>* %a + ret void +} + + +; Test pattern (v8f16 (AArch64NvCast (v2i64 FPR128:$src))) +define void @nvcast_v2i64(<8 x half>* %a) { +; CHECK-LABEL: nvcast_v2i64: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: movi v[[REG:[0-9]+]].2d, #0000000000000000 +; CHECK-NEXT: str q[[REG]], [x0] +; CHECK-NEXT: ret + store volatile <8 x half> zeroinitializer, <8 x half>* %a + ret void +}