Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -721,12 +721,18 @@ } void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { - if (VT == MVT::v2f32 || VT == MVT::v4f16) { + if (VT == MVT::v2f32) { setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType(ISD::LOAD, VT, MVT::v2i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType(ISD::STORE, VT, MVT::v2i32); + } else if (VT == MVT::v4f16) { + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType(ISD::LOAD, VT, MVT::v4i16); + + setOperationAction(ISD::STORE, VT, Promote); + AddPromotedToType(ISD::STORE, VT, MVT::v4i16); } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) { setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType(ISD::LOAD, VT, MVT::v2i64); Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -5849,7 +5849,7 @@ def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), - (v4f16 (REV64v4i16 FPR64:$src))>; + (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 (REV16v8i8 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), Index: test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll =================================================================== --- test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll +++ test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll @@ -1099,3 +1099,17 @@ store <16 x i8> %4, <16 x i8>* %q ret void } + +; CHECK-LABEL: test_v4f16_struct: +%struct.struct1 = type { half, half, half, half } +define %struct.struct1 @test_v4f16_struct(%struct.struct1* %ret) { +entry: +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: ld1 { {{v[0-9]+}}.2s } +; CHECK-NOT: rev64 + %0 = bitcast %struct.struct1* %ret to <4 x half>* + %1 = load <4 x half>, <4 x half>* %0, align 2 + %2 = extractelement <4 x half> %1, i32 0 + %.fca.0.insert = insertvalue %struct.struct1 undef, half %2, 0 + ret %struct.struct1 %.fca.0.insert +}