Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -721,18 +721,37 @@ } void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { - if (VT == MVT::v2f32 || VT == MVT::v4f16) { - setOperationAction(ISD::LOAD, VT, Promote); - AddPromotedToType(ISD::LOAD, VT, MVT::v2i32); + bool IsPromotedFloatVector = true; + MVT PromoteTo = MVT::INVALID_SIMPLE_VALUE_TYPE; + switch (VT.SimpleTy) { + case MVT::v2f32: + PromoteTo = MVT::v2i32; + break; + case MVT::v4f16: + PromoteTo = MVT::v4i16; + break; + case MVT::v2f64: + PromoteTo = MVT::v2i64; + break; + case MVT::v4f32: + PromoteTo = MVT::v4i32; + break; + case MVT::v8f16: + PromoteTo = MVT::v8i16; + break; + default: + IsPromotedFloatVector = false; + } + + if (IsPromotedFloatVector) { + assert(PromoteTo != MVT::INVALID_SIMPLE_VALUE_TYPE && + "Invalid type to promote NEON type to"); - setOperationAction(ISD::STORE, VT, Promote); - AddPromotedToType(ISD::STORE, VT, MVT::v2i32); - } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) { setOperationAction(ISD::LOAD, VT, Promote); - AddPromotedToType(ISD::LOAD, VT, MVT::v2i64); + AddPromotedToType(ISD::LOAD, VT, PromoteTo); setOperationAction(ISD::STORE, VT, Promote); - AddPromotedToType(ISD::STORE, VT, MVT::v2i64); + AddPromotedToType(ISD::STORE, VT, PromoteTo); } // Mark vector float intrinsics as expand. Index: test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll =================================================================== --- test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll +++ test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll @@ -53,8 +53,9 @@ ; CHECK-LABEL: test_i64_v4f16: define void @test_i64_v4f16(<4 x half>* %p, i64* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2s } -; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev +; CHECK: fadd ; CHECK: rev64 v{{[0-9]+}}.4h ; CHECK: str %1 = load <4 x half>, <4 x half>* %p @@ -156,9 +157,11 @@ ; CHECK-LABEL: test_f64_v4f16: define void @test_f64_v4f16(<4 x half>* %p, double* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2s } -; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev +; CHECK: fadd ; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: fadd ; CHECK: str %1 = load <4 x half>, <4 x half>* %p %2 = fadd <4 x half> %1, %1 @@ -233,8 +236,9 @@ ; CHECK-LABEL: test_v1i64_v4f16: define void @test_v1i64_v4f16(<4 x half>* %p, <1 x i64>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2s } -; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev +; CHECK: fadd ; CHECK: rev64 v{{[0-9]+}}.4h ; CHECK: str %1 = load <4 x half>, <4 x half>* %p @@ -337,8 +341,9 @@ ; CHECK-LABEL: test_v2f32_v4f16: define void @test_v2f32_v4f16(<4 x half>* %p, <2 x float>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2s } -; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev +; CHECK: fadd ; CHECK: rev32 v{{[0-9]+}}.4h ; CHECK: st1 { v{{[0-9]+}}.2s } %1 = load <4 x half>, <4 x half>* %p @@ -506,8 +511,8 @@ ; CHECK-LABEL: test_v4i16_v4f16: define void @test_v4i16_v4f16(<4 x half>* %p, <4 x i16>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2s } -; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK-NOT: rev ; CHECK: st1 { v{{[0-9]+}}.4h } %1 = load <4 x half>, <4 x half>* %p %2 = fadd <4 x half> %1, %1 @@ -534,8 +539,9 @@ define void @test_v4f16_i64(i64* %p, <4 x half>* %q) { ; CHECK: ldr ; CHECK: rev64 v{{[0-9]+}}.4h -; CHECK: rev32 v{{[0-9]+}}.4h -; CHECK: st1 { v{{[0-9]+}}.2s } +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } %1 = load i64, i64* %p %2 = add i64 %1, %1 %3 = bitcast i64 %2 to <4 x half> @@ -548,8 +554,9 @@ define void @test_v4f16_f64(double* %p, <4 x half>* %q) { ; CHECK: ldr ; CHECK: rev64 v{{[0-9]+}}.4h -; CHECK: rev32 v{{[0-9]+}}.4h -; CHECK: st1 { v{{[0-9]+}}.2s } +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } %1 = load double, double* %p %2 = fadd double %1, %1 %3 = bitcast double %2 to <4 x half> @@ -562,8 +569,9 @@ define void @test_v4f16_v1i64(<1 x i64>* %p, <4 x half>* %q) { ; CHECK: ldr ; CHECK: rev64 v{{[0-9]+}}.4h -; CHECK: rev32 v{{[0-9]+}}.4h -; CHECK: st1 { v{{[0-9]+}}.2s } +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } %1 = load <1 x i64>, <1 x i64>* %p %2 = add <1 x i64> %1, %1 %3 = bitcast <1 x i64> %2 to <4 x half> @@ -576,8 +584,9 @@ define void @test_v4f16_v2f32(<2 x float>* %p, <4 x half>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.2s } ; CHECK: rev32 v{{[0-9]+}}.4h -; CHECK: rev32 v{{[0-9]+}}.4h -; CHECK: st1 { v{{[0-9]+}}.2s } +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } %1 = load <2 x float>, <2 x float>* %p %2 = fadd <2 x float> %1, %1 %3 = bitcast <2 x float> %2 to <4 x half> @@ -590,8 +599,9 @@ define void @test_v4f16_v2i32(<2 x i32>* %p, <4 x half>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.2s } ; CHECK: rev32 v{{[0-9]+}}.4h -; CHECK: rev32 v{{[0-9]+}}.4h -; CHECK: st1 { v{{[0-9]+}}.2s } +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } %1 = load <2 x i32>, <2 x i32>* %p %2 = add <2 x i32> %1, %1 %3 = bitcast <2 x i32> %2 to <4 x half> @@ -603,8 +613,8 @@ ; CHECK-LABEL: test_v4f16_v4i16: define void @test_v4f16_v4i16(<4 x i16>* %p, <4 x half>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.4h } -; CHECK: rev32 v{{[0-9]+}}.4h -; CHECK: st1 { v{{[0-9]+}}.2s } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } %1 = load <4 x i16>, <4 x i16>* %p %2 = add <4 x i16> %1, %1 %3 = bitcast <4 x i16> %2 to <4 x half> @@ -617,8 +627,9 @@ define void @test_v4f16_v8i8(<8 x i8>* %p, <4 x half>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8b } ; CHECK: rev16 v{{[0-9]+}}.8b -; CHECK: rev32 v{{[0-9]+}}.4h -; CHECK: st1 { v{{[0-9]+}}.2s } +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4h } %1 = load <8 x i8>, <8 x i8>* %p %2 = add <8 x i8> %1, %1 %3 = bitcast <8 x i8> %2 to <4 x half> @@ -733,8 +744,9 @@ ; CHECK-LABEL: test_f128_v4f32: define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } +; CHECK-NOT: rev +; CHECK: fadd ; CHECK: rev64 v{{[0-9]+}}.4s ; CHECK: ext ; CHECK: str q @@ -814,8 +826,9 @@ ; CHECK-LABEL: test_v2f64_v4f32: define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } +; CHECK-NOT: rev +; CHECK: fadd ; CHECK: rev64 v{{[0-9]+}}.4s ; CHECK: st1 { v{{[0-9]+}}.2d } %1 = load <4 x float>, <4 x float>* %p @@ -892,9 +905,11 @@ ; CHECK-LABEL: test_v2i64_v4f32: define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } +; CHECK-NOT: rev +; CHECK: fadd ; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: add ; CHECK: st1 { v{{[0-9]+}}.2d } %1 = load <4 x float>, <4 x float>* %p %2 = fadd <4 x float> %1, %1 @@ -948,8 +963,8 @@ ; CHECK: ldr q ; CHECK: rev64 v{{[0-9]+}}.4s ; CHECK: ext -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load fp128, fp128* %p %2 = fadd fp128 %1, %1 %3 = bitcast fp128 %2 to <4 x float> @@ -962,8 +977,8 @@ define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.2d } ; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <2 x double>, <2 x double>* %p %2 = fadd <2 x double> %1, %1 %3 = bitcast <2 x double> %2 to <4 x float> @@ -976,8 +991,9 @@ define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.2d } ; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK: fadd +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <2 x i64>, <2 x i64>* %p %2 = add <2 x i64> %1, %1 %3 = bitcast <2 x i64> %2 to <4 x float> @@ -989,8 +1005,8 @@ ; CHECK-LABEL: test_v4f32_v4i32: define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.4s } -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <4 x i32>, <4 x i32>* %p %2 = add <4 x i32> %1, %1 %3 = bitcast <4 x i32> %2 to <4 x float> @@ -1003,8 +1019,8 @@ define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8h } ; CHECK: rev32 v{{[0-9]+}}.8h -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <8 x i16>, <8 x i16>* %p %2 = add <8 x i16> %1, %1 %3 = bitcast <8 x i16> %2 to <4 x float> @@ -1017,8 +1033,8 @@ define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.16b } ; CHECK: rev32 v{{[0-9]+}}.16b -; CHECK: rev64 v{{[0-9]+}}.4s -; CHECK: st1 { v{{[0-9]+}}.2d } +; CHECK-NOT: rev +; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <16 x i8>, <16 x i8>* %p %2 = add <16 x i8> %1, %1 %3 = bitcast <16 x i8> %2 to <4 x float> @@ -1069,8 +1085,8 @@ ; CHECK-LABEL: test_v4i32_v4f32: define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } +; CHECK-NOT: rev ; CHECK: st1 { v{{[0-9]+}}.4s } %1 = load <4 x float>, <4 x float>* %p %2 = fadd <4 x float> %1, %1 @@ -1148,9 +1164,9 @@ ; CHECK-LABEL: test_v8i16_v4f32: define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } ; CHECK: rev32 v{{[0-9]+}}.8h +; CHECK-NOT: rev ; CHECK: st1 { v{{[0-9]+}}.8h } %1 = load <4 x float>, <4 x float>* %p %2 = fadd <4 x float> %1, %1 @@ -1175,8 +1191,8 @@ ; CHECK-LABEL: test_v8i16_v8f16: define void @test_v8i16_v8f16(<8 x half>* %p, <8 x i16>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.8h +; CHECK: ld1 { v{{[0-9]+}}.8h } +; CHECK-NOT: rev ; CHECK: st1 { v{{[0-9]+}}.8h } %1 = load <8 x half>, <8 x half>* %p %2 = fadd <8 x half> %1, %1 @@ -1241,9 +1257,9 @@ ; CHECK-LABEL: test_v16i8_v4f32: define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.4s +; CHECK: ld1 { v{{[0-9]+}}.4s } ; CHECK: rev32 v{{[0-9]+}}.16b +; CHECK-NOT: rev ; CHECK: st1 { v{{[0-9]+}}.16b } %1 = load <4 x float>, <4 x float>* %p %2 = fadd <4 x float> %1, %1 @@ -1268,9 +1284,9 @@ ; CHECK-LABEL: test_v16i8_v8f16: define void @test_v16i8_v8f16(<8 x half>* %p, <16 x i8>* %q) { -; CHECK: ld1 { v{{[0-9]+}}.2d } -; CHECK: rev64 v{{[0-9]+}}.8h +; CHECK: ld1 { v{{[0-9]+}}.8h } ; CHECK: rev16 v{{[0-9]+}}.16b +; CHECK-NOT: rev ; CHECK: st1 { v{{[0-9]+}}.16b } %1 = load <8 x half>, <8 x half>* %p %2 = fadd <8 x half> %1, %1 @@ -1297,9 +1313,8 @@ %struct.struct1 = type { half, half, half, half } define %struct.struct1 @test_v4f16_struct(%struct.struct1* %ret) { entry: -; CHECK: ld1 { {{v[0-9]+}}.2s } -; CHECK: rev32 -; CHECK-NOT; rev64 +; CHECK: ld1 { {{v[0-9]+}}.4h } +; CHECK-NOT: rev %0 = bitcast %struct.struct1* %ret to <4 x half>* %1 = load <4 x half>, <4 x half>* %0, align 2 %2 = extractelement <4 x half> %1, i32 0