diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1015,10 +1015,6 @@ // elements smaller than i32, so promote the input to i32 first. setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32); setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32); - setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32); - setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32); - setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32); - setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32); // Similarly, there is no direct i32 -> f64 vector conversion instruction. setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); @@ -1031,6 +1027,10 @@ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); if (Subtarget->hasFullFP16()) { + setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); @@ -1038,6 +1038,10 @@ } else { // when AArch64 doesn't have fullfp16 support, promote the input // to i32 first. + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32); + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32); + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32); + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32); setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32); setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32); setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32); diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -414,41 +414,55 @@ } define <8 x half> @sitofp_v8i8(<8 x i8> %a) #0 { -; CHECK-LABEL: sitofp_v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: scvtf v1.4s, v1.4s -; CHECK-NEXT: scvtf v0.4s, v0.4s -; CHECK-NEXT: fcvtn v1.4h, v1.4s -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: sitofp_v8i8: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-CVT-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s +; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s +; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s +; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-NEXT: mov v0.d[1], v1.d[0] +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: sitofp_v8i8: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h +; CHECK-FP16-NEXT: ret %1 = sitofp <8 x i8> %a to <8 x half> ret <8 x half> %1 } define <16 x half> @sitofp_v16i8(<16 x i8> %a) #0 { -; CHECK-LABEL: sitofp_v16i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0 -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: scvtf v2.4s, v2.4s -; CHECK-NEXT: scvtf v1.4s, v1.4s -; CHECK-NEXT: scvtf v3.4s, v3.4s -; CHECK-NEXT: scvtf v0.4s, v0.4s -; CHECK-NEXT: fcvtn v2.4h, v2.4s -; CHECK-NEXT: fcvtn v1.4h, v1.4s -; CHECK-NEXT: fcvtn v3.4h, v3.4s -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: mov v1.d[1], v2.d[0] -; CHECK-NEXT: mov v0.d[1], v3.d[0] -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: sitofp_v16i8: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-CVT-NEXT: sshll2 v2.4s, v1.8h, #0 +; CHECK-CVT-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-CVT-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s +; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s +; CHECK-CVT-NEXT: scvtf v3.4s, v3.4s +; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s +; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s +; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s +; CHECK-CVT-NEXT: fcvtn v3.4h, v3.4s +; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0] +; CHECK-CVT-NEXT: mov v0.d[1], v3.d[0] +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: sitofp_v16i8: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-FP16-NEXT: scvtf v1.8h, v1.8h +; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h +; CHECK-FP16-NEXT: ret %1 = sitofp <16 x i8> %a to <16 x half> ret <16 x half> %1 } @@ -525,41 +539,55 @@ } define <8 x half> @uitofp_v8i8(<8 x i8> %a) #0 { -; CHECK-LABEL: uitofp_v8i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ucvtf v1.4s, v1.4s -; CHECK-NEXT: ucvtf v0.4s, v0.4s -; CHECK-NEXT: fcvtn v1.4h, v1.4s -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: uitofp_v8i8: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-CVT-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s +; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s +; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s +; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-NEXT: mov v0.d[1], v1.d[0] +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: uitofp_v8i8: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h +; CHECK-FP16-NEXT: ret %1 = uitofp <8 x i8> %a to <8 x half> ret <8 x half> %1 } define <16 x half> @uitofp_v16i8(<16 x i8> %a) #0 { -; CHECK-LABEL: uitofp_v16i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ucvtf v2.4s, v2.4s -; CHECK-NEXT: ucvtf v1.4s, v1.4s -; CHECK-NEXT: ucvtf v3.4s, v3.4s -; CHECK-NEXT: ucvtf v0.4s, v0.4s -; CHECK-NEXT: fcvtn v2.4h, v2.4s -; CHECK-NEXT: fcvtn v1.4h, v1.4s -; CHECK-NEXT: fcvtn v3.4h, v3.4s -; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: mov v1.d[1], v2.d[0] -; CHECK-NEXT: mov v0.d[1], v3.d[0] -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: uitofp_v16i8: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-CVT-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-CVT-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-CVT-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s +; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s +; CHECK-CVT-NEXT: ucvtf v3.4s, v3.4s +; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s +; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s +; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s +; CHECK-CVT-NEXT: fcvtn v3.4h, v3.4s +; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s +; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0] +; CHECK-CVT-NEXT: mov v0.d[1], v3.d[0] +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: uitofp_v16i8: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-FP16-NEXT: ucvtf v1.8h, v1.8h +; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h +; CHECK-FP16-NEXT: ret %1 = uitofp <16 x i8> %a to <16 x half> ret <16 x half> %1 }