diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5884,6 +5884,27 @@ defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; +// Insert from bitcast +// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0) +def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), + (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>; +def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), 0)), + (INSERT_SUBREG v4i32:$src, FPR32:$Sn, ssub)>; +def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)), + (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; +def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), 0)), + (INSERT_SUBREG v2i64:$src, FPR64:$Sn, dsub)>; + +// bitcast of an extract +// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) +def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), + (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; +def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))), + (EXTRACT_SUBREG V128:$src, ssub)>; +def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), + (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; +def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))), + (EXTRACT_SUBREG V128:$src, dsub)>; // Floating point vector extractions are codegen'd as either a sequence of // subregister extractions, or a MOV (aka DUP here) if diff --git a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll --- a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll +++ b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll @@ -4,8 +4,8 @@ define <4 x i32> @test_vins_v4i32(<4 x i32> %a, float %b) { ; CHECK-LABEL: test_vins_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 +; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: ret entry: %c = bitcast float %b to i32 @@ -16,8 +16,7 @@ define <4 x i32> @test_vins_v4i32_0(<4 x i32> %a, float %b) { ; CHECK-LABEL: test_vins_v4i32_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.s[0], w8 +; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: ret entry: %c = bitcast float %b to i32 @@ -28,9 +27,9 @@ define <2 x i32> @test_vins_v2i32(<2 x i32> %a, float %b) { ; CHECK-LABEL: test_vins_v2i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 +; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -42,9 +41,8 @@ define <2 x i32> @test_vins_v2i32_0(<2 x i32> %a, float %b) { ; CHECK-LABEL: test_vins_v2i32_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov v0.s[0], w8 +; CHECK-NEXT: fmov s0, s1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -56,8 +54,8 @@ define <2 x i64> @test_vins_v2i64(<2 x i64> %a, double %b) { ; CHECK-LABEL: test_vins_v2i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret entry: %c = bitcast double %b to i64 @@ -68,8 +66,7 @@ define <2 x i64> @test_vins_v2i64_0(<2 x i64> %a, double %b) { ; CHECK-LABEL: test_vins_v2i64_0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: mov v0.d[0], x8 +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret entry: %c = bitcast double %b to i64 @@ -92,8 +89,8 @@ define float @test_vext_v4i32(<4 x i32> %a) { ; CHECK-LABEL: test_vext_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[0], v0.s[3] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: ret entry: %b = extractelement <4 x i32> %a, i32 3 @@ -116,8 +113,8 @@ ; CHECK-LABEL: test_vext_v2i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[0], v0.s[1] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: ret entry: %b = extractelement <2 x i32> %a, i32 1 @@ -140,8 +137,8 @@ define double @test_vext_v2i64(<2 x i64> %a) { ; CHECK-LABEL: test_vext_v2i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, v0.d[1] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[0], v0.d[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %b = extractelement <2 x i64> %a, i32 1