Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -3994,6 +3994,8 @@ }; switch (EltSize) { + case 8: + return BuildFn(AArch64::bsub); case 16: return BuildFn(AArch64::hsub); case 32: @@ -5543,7 +5545,7 @@ if (tryOptBuildVecToSubregToReg(I, MRI)) return true; - if (EltSize < 16 || EltSize > 64) + if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64) return false; // Don't support all element types yet. const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); Index: llvm/test/CodeGen/AArch64/load-insert-zero.ll =================================================================== --- llvm/test/CodeGen/AArch64/load-insert-zero.ll +++ llvm/test/CodeGen/AArch64/load-insert-zero.ll @@ -1,74 +1,161 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+bf16 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+bf16 | FileCheck %s -check-prefixes=CHECK-SD +; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+bf16 -global-isel | FileCheck %s -check-prefixes=CHECK-GI define <8 x i8> @loadv8i8(ptr %p) { -; CHECK-LABEL: loadv8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv8i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv8i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #0 +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[4], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[5], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[6], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[7], v1.b[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %v = insertelement <8 x i8> zeroinitializer, i8 %l, i32 0 ret <8 x i8> %v } define <16 x i8> @loadv4i8(ptr %p) { -; CHECK-LABEL: loadv4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #0 +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[4], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[5], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[6], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[7], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[8], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[9], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[10], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[11], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[12], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[13], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[14], v1.b[0] +; CHECK-GI-NEXT: mov v0.b[15], v1.b[0] +; CHECK-GI-NEXT: ret %l = load i8, ptr %p %v = insertelement <16 x i8> zeroinitializer, i8 %l, i32 0 ret <16 x i8> %v } define <4 x i16> @loadv4i16(ptr %p) { -; CHECK-LABEL: loadv4i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #0 +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v1.h[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %v = insertelement <4 x i16> zeroinitializer, i16 %l, i32 0 ret <4 x i16> %v } define <8 x i16> @loadv8i16(ptr %p) { -; CHECK-LABEL: loadv8i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv8i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv8i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #0 +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[5], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[7], v1.h[0] +; CHECK-GI-NEXT: ret %l = load i16, ptr %p %v = insertelement <8 x i16> zeroinitializer, i16 %l, i32 0 ret <8 x i16> %v } define <2 x i32> @loadv2i32(ptr %p) { -; CHECK-LABEL: loadv2i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv2i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv2i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s1, wzr +; CHECK-GI-NEXT: ldr s0, [x0] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %v = insertelement <2 x i32> zeroinitializer, i32 %l, i32 0 ret <2 x i32> %v } define <4 x i32> @loadv4i32(ptr %p) { -; CHECK-LABEL: loadv4i32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv4i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv4i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s1, wzr +; CHECK-GI-NEXT: ldr s0, [x0] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v1.s[0] +; CHECK-GI-NEXT: ret %l = load i32, ptr %p %v = insertelement <4 x i32> zeroinitializer, i32 %l, i32 0 ret <4 x i32> %v } define <2 x i64> @loadv2i64(ptr %p) { -; CHECK-LABEL: loadv2i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv2i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv2i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d1, xzr +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: ret %l = load i64, ptr %p %v = insertelement <2 x i64> zeroinitializer, i64 %l, i32 0 ret <2 x i64> %v @@ -76,74 +163,145 @@ define <4 x half> @loadv4f16(ptr %p) { -; CHECK-LABEL: loadv4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv4f16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv4f16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d1, #0000000000000000 +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v1.h[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %l = load half, ptr %p %v = insertelement <4 x half> zeroinitializer, half %l, i32 0 ret <4 x half> %v } define <8 x half> @loadv8f16(ptr %p) { -; CHECK-LABEL: loadv8f16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv8f16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv8f16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d1, #0000000000000000 +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[5], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[7], v1.h[0] +; CHECK-GI-NEXT: ret %l = load half, ptr %p %v = insertelement <8 x half> zeroinitializer, half %l, i32 0 ret <8 x half> %v } define <4 x bfloat> @loadv4bf16(ptr %p) { -; CHECK-LABEL: loadv4bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv4bf16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv4bf16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d1, #0000000000000000 +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v1.h[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %l = load bfloat, ptr %p %v = insertelement <4 x bfloat> zeroinitializer, bfloat %l, i32 0 ret <4 x bfloat> %v } define <8 x bfloat> @loadv8bf16(ptr %p) { -; CHECK-LABEL: loadv8bf16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv8bf16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv8bf16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d1, #0000000000000000 +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[4], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[5], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[6], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[7], v1.h[0] +; CHECK-GI-NEXT: ret %l = load bfloat, ptr %p %v = insertelement <8 x bfloat> zeroinitializer, bfloat %l, i32 0 ret <8 x bfloat> %v } define <2 x float> @loadv2f32(ptr %p) { -; CHECK-LABEL: loadv2f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv2f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv2f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d1, #0000000000000000 +; CHECK-GI-NEXT: ldr s0, [x0] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret %l = load float, ptr %p %v = insertelement <2 x float> zeroinitializer, float %l, i32 0 ret <2 x float> %v } define <4 x float> @loadv4f32(ptr %p) { -; CHECK-LABEL: loadv4f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv4f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv4f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d1, #0000000000000000 +; CHECK-GI-NEXT: ldr s0, [x0] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v1.s[0] +; CHECK-GI-NEXT: ret %l = load float, ptr %p %v = insertelement <4 x float> zeroinitializer, float %l, i32 0 ret <4 x float> %v } define <2 x double> @loadv2f64(ptr %p) { -; CHECK-LABEL: loadv2f64: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loadv2f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loadv2f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi d1, #0000000000000000 +; CHECK-GI-NEXT: ldr d0, [x0] +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: ret %l = load double, ptr %p %v = insertelement <2 x double> zeroinitializer, double %l, i32 0 ret <2 x double> %v } +