Index: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td @@ -5807,7 +5807,7 @@ def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), - (v2i32 (REV64v4i16 FPR64:$src))>; + (v2i32 (REV32v4i16 FPR64:$src))>; } def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; @@ -5816,7 +5816,6 @@ def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; } @@ -5829,18 +5828,16 @@ (v4i16 (REV16v8i8 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 (REV64v4i16 FPR64:$src))>; -def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), - (v4i16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 (REV64v4i16 FPR64:$src))>; } +def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; @@ -5850,20 +5847,17 @@ def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), - (v4f16 (REV64v4i16 FPR64:$src))>; -def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), - (v4f16 (REV64v4i16 FPR64:$src))>; + (v4f16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 (REV16v8i8 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), - (v4f16 (REV64v4i16 FPR64:$src))>; + (v4f16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; } - - +def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; @@ -5955,7 +5949,7 @@ def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), - (v2f32 (REV64v4i16 FPR64:$src))>; + (v2f32 (REV32v4i16 FPR64:$src))>; } def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; @@ -6098,7 +6092,6 @@ def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), @@ -6115,15 +6108,13 @@ (v8i16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 (REV32v8i16 FPR128:$src))>; -def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), - (v8i16 (REV32v8i16 FPR128:$src))>; } +def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; @@ -6137,8 +6128,6 @@ (v8f16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 (REV32v8i16 FPR128:$src))>; -def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), - (v8f16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 (REV16v16i8 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), @@ -6146,6 +6135,7 @@ def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 (REV32v8i16 FPR128:$src))>; } +def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; Index: llvm/trunk/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll @@ -51,6 +51,20 @@ ret void } +; CHECK-LABEL: test_i64_v4f16: +define void @test_i64_v4f16(<4 x half>* %p, i64* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: str + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to i64 + %4 = add i64 %3, %3 + store i64 %4, i64* %q + ret void +} + ; CHECK-LABEL: test_i64_v4i16: define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) { ; CHECK: ld1 { v{{[0-9]+}}.4h } @@ -140,6 +154,20 @@ ret void } +; CHECK-LABEL: test_f64_v4f16: +define void @test_f64_v4f16(<4 x half>* %p, double* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: str + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to double + %4 = fadd double %3, %3 + store double %4, double* %q + ret void +} + ; CHECK-LABEL: test_f64_v8i8: define void @test_f64_v8i8(<8 x i8>* %p, double* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8b } @@ -203,6 +231,20 @@ ret void } +; CHECK-LABEL: test_v1i64_v4f16: +define void @test_v1i64_v4f16(<4 x half>* %p, <1 x i64>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: str + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to <1 x i64> + %4 = add <1 x i64> %3, %3 + store <1 x i64> %4, <1 x i64>* %q + ret void +} + ; CHECK-LABEL: test_v1i64_v4i16: define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.4h } @@ -293,6 +335,20 @@ ret void } +; CHECK-LABEL: test_v2f32_v4f16: +define void @test_v2f32_v4f16(<4 x half>* %p, <2 x float>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to <2 x float> + %4 = fadd <2 x float> %3, %3 + store <2 x float> %4, <2 x float>* %q + ret void +} + ; CHECK-LABEL: test_v2f32_v8i8: define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8b } @@ -448,6 +504,19 @@ ret void } +; CHECK-LABEL: test_v4i16_v4f16: +define void @test_v4i16_v4f16(<4 x half>* %p, <4 x i16>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.4h } + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to <4 x i16> + %4 = add <4 x i16> %3, %3 + store <4 x i16> %4, <4 x i16>* %q + ret void +} + ; CHECK-LABEL: test_v4i16_v8i8: define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8b } @@ -461,6 +530,103 @@ ret void } +; CHECK-LABEL: test_v4f16_i64: +define void @test_v4f16_i64(i64* %p, <4 x half>* %q) { +; CHECK: ldr +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load i64, i64* %p + %2 = add i64 %1, %1 + %3 = bitcast i64 %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_f64: +define void @test_v4f16_f64(double* %p, <4 x half>* %q) { +; CHECK: ldr +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load double, double* %p + %2 = fadd double %1, %1 + %3 = bitcast double %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v1i64: +define void @test_v4f16_v1i64(<1 x i64>* %p, <4 x half>* %q) { +; CHECK: ldr +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <1 x i64>, <1 x i64>* %p + %2 = add <1 x i64> %1, %1 + %3 = bitcast <1 x i64> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v2f32: +define void @test_v4f16_v2f32(<2 x float>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <2 x float>, <2 x float>* %p + %2 = fadd <2 x float> %1, %1 + %3 = bitcast <2 x float> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v2i32: +define void @test_v4f16_v2i32(<2 x i32>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <2 x i32>, <2 x i32>* %p + %2 = add <2 x i32> %1, %1 + %3 = bitcast <2 x i32> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v4i16: +define void @test_v4f16_v4i16(<4 x i16>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <4 x i16>, <4 x i16>* %p + %2 = add <4 x i16> %1, %1 + %3 = bitcast <4 x i16> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v8i8: +define void @test_v4f16_v8i8(<8 x i8>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.8b } +; CHECK: rev16 v{{[0-9]+}}.8b +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <8 x i8>, <8 x i8>* %p + %2 = add <8 x i8> %1, %1 + %3 = bitcast <8 x i8> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + ; CHECK-LABEL: test_v8i8_i64: define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) { ; CHECK: ldr @@ -1007,6 +1173,19 @@ ret void } +; CHECK-LABEL: test_v8i16_v8f16: +define void @test_v8i16_v8f16(<8 x half>* %p, <8 x i16>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2d } +; CHECK: rev64 v{{[0-9]+}}.8h +; CHECK: st1 { v{{[0-9]+}}.8h } + %1 = load <8 x half>, <8 x half>* %p + %2 = fadd <8 x half> %1, %1 + %3 = bitcast <8 x half> %2 to <8 x i16> + %4 = add <8 x i16> %3, %3 + store <8 x i16> %4, <8 x i16>* %q + ret void +} + ; CHECK-LABEL: test_v8i16_v16i8: define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.16b } @@ -1087,6 +1266,20 @@ ret void } +; CHECK-LABEL: test_v16i8_v8f16: +define void @test_v16i8_v8f16(<8 x half>* %p, <16 x i8>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2d } +; CHECK: rev64 v{{[0-9]+}}.8h +; CHECK: rev16 v{{[0-9]+}}.16b +; CHECK: st1 { v{{[0-9]+}}.16b } + %1 = load <8 x half>, <8 x half>* %p + %2 = fadd <8 x half> %1, %1 + %3 = bitcast <8 x half> %2 to <16 x i8> + %4 = add <16 x i8> %3, %3 + store <16 x i8> %4, <16 x i8>* %q + ret void +} + ; CHECK-LABEL: test_v16i8_v8i16: define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8h } @@ -1099,3 +1292,17 @@ store <16 x i8> %4, <16 x i8>* %q ret void } + +; CHECK-LABEL: test_v4f16_struct: +%struct.struct1 = type { half, half, half, half } +define %struct.struct1 @test_v4f16_struct(%struct.struct1* %ret) { +entry: +; CHECK: ld1 { {{v[0-9]+}}.2s } +; CHECK: rev32 +; CHECK-NOT; rev64 + %0 = bitcast %struct.struct1* %ret to <4 x half>* + %1 = load <4 x half>, <4 x half>* %0, align 2 + %2 = extractelement <4 x half> %1, i32 0 + %.fca.0.insert = insertvalue %struct.struct1 undef, half %2, 0 + ret %struct.struct1 %.fca.0.insert +}