Index: llvm/lib/Target/ARM/ARMInstrVFP.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrVFP.td +++ llvm/lib/Target/ARM/ARMInstrVFP.td @@ -1605,6 +1605,8 @@ def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; + def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f64 DPR:$a), i32)), addrmode5:$ptr), + (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; } def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, @@ -1627,6 +1629,9 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr), (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; +def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f32 SPR:$a), i32)), + addrmode5:$ptr), + (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, (outs SPR:$Sd), (ins HPR:$Sm), @@ -1658,6 +1663,8 @@ def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; + def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f64 DPR:$a), i32)), addrmode5:$ptr), + (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; } def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, @@ -1680,6 +1687,9 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr), (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; +def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f32 SPR:$a), i32)), + addrmode5:$ptr), + (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, (outs SPR:$Sd), (ins HPR:$Sm), Index: llvm/test/CodeGen/ARM/fptoi-sat-store.ll =================================================================== --- llvm/test/CodeGen/ARM/fptoi-sat-store.ll +++ llvm/test/CodeGen/ARM/fptoi-sat-store.ll @@ -63,8 +63,7 @@ ; VFP: @ %bb.0: ; VFP-NEXT: vmov s0, r1 ; VFP-NEXT: vcvt.s32.f32 s0, s0 -; VFP-NEXT: vmov r1, s0 -; VFP-NEXT: str r1, [r0] +; VFP-NEXT: vstr s0, [r0] ; VFP-NEXT: bx lr %r = call i32 @llvm.fptosi.sat.i32.f32(float %f) store i32 %r, i32* %d, align 4 @@ -141,16 +140,14 @@ ; VFP2: @ %bb.0: ; VFP2-NEXT: vmov d16, r2, r3 ; VFP2-NEXT: vcvt.s32.f64 s0, d16 -; VFP2-NEXT: vmov r1, s0 -; VFP2-NEXT: str r1, [r0] +; VFP2-NEXT: vstr s0, [r0] ; VFP2-NEXT: bx lr ; ; FP16-LABEL: test_signed_i32_f64: ; FP16: @ %bb.0: ; FP16-NEXT: vmov d0, r2, r3 ; FP16-NEXT: vcvt.s32.f64 s0, d0 -; FP16-NEXT: vmov r1, s0 -; FP16-NEXT: str r1, [r0] +; FP16-NEXT: vstr s0, [r0] ; FP16-NEXT: bx lr %r = call i32 @llvm.fptosi.sat.i32.f64(double %f) store i32 %r, i32* %d, align 4 @@ -200,8 +197,7 @@ ; VFP: @ %bb.0: ; VFP-NEXT: vmov s0, r1 ; VFP-NEXT: vcvt.u32.f32 s0, s0 -; VFP-NEXT: vmov r1, s0 -; VFP-NEXT: str r1, [r0] +; VFP-NEXT: vstr s0, [r0] ; VFP-NEXT: bx lr %r = call i32 @llvm.fptoui.sat.i32.f32(float %f) store i32 %r, i32* %d, align 4 @@ -260,16 +256,14 @@ ; VFP2: @ %bb.0: ; VFP2-NEXT: vmov d16, r2, r3 ; VFP2-NEXT: vcvt.u32.f64 s0, d16 -; VFP2-NEXT: vmov r1, s0 -; VFP2-NEXT: str r1, [r0] +; VFP2-NEXT: vstr s0, [r0] ; VFP2-NEXT: bx lr ; ; FP16-LABEL: test_unsigned_i32_f64: ; FP16: @ %bb.0: ; FP16-NEXT: vmov d0, r2, r3 ; FP16-NEXT: vcvt.u32.f64 s0, d0 -; FP16-NEXT: vmov r1, s0 -; FP16-NEXT: str r1, [r0] +; FP16-NEXT: vstr s0, [r0] ; FP16-NEXT: bx lr %r = call i32 @llvm.fptoui.sat.i32.f64(double %f) store i32 %r, i32* %d, align 4 Index: llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -152,20 +152,19 @@ define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) { ; CHECK-MVE-LABEL: test_signed_v5f32_v5i32: ; CHECK-MVE: @ %bb.0: -; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4 -; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2 +; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s3 ; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s1 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmov r2, s0 -; CHECK-MVE-NEXT: str r1, [r0, #16] +; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4 ; CHECK-MVE-NEXT: vmov r1, s2 +; CHECK-MVE-NEXT: vmov r2, s0 ; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1 ; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: vmov r2, s8 ; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1 ; CHECK-MVE-NEXT: vstrw.32 q0, [r0] +; CHECK-MVE-NEXT: vstr s4, [r0, #16] ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: test_signed_v5f32_v5i32: @@ -183,22 +182,21 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) { ; CHECK-MVE-LABEL: test_signed_v6f32_v6i32: ; CHECK-MVE: @ %bb.0: -; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s5 -; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4 ; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2 ; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s3 ; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s1 -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmov r2, s4 -; CHECK-MVE-NEXT: strd r2, r1, [r0, #16] +; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s5 +; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4 ; CHECK-MVE-NEXT: vmov r1, s2 ; CHECK-MVE-NEXT: vmov r2, s0 ; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1 ; CHECK-MVE-NEXT: vmov r1, s8 ; CHECK-MVE-NEXT: vmov r2, s10 ; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1 +; CHECK-MVE-NEXT: vstr s6, [r0, #20] ; CHECK-MVE-NEXT: vstrw.32 q0, [r0] +; CHECK-MVE-NEXT: vstr s4, [r0, #16] ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: test_signed_v6f32_v6i32: @@ -218,25 +216,23 @@ define arm_aapcs_vfpcc <7 x i32> @test_signed_v7f32_v7i32(<7 x float> %f) { ; CHECK-MVE-LABEL: test_signed_v7f32_v7i32: ; CHECK-MVE: @ %bb.0: -; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s5 -; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4 -; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s6 ; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2 ; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s3 ; CHECK-MVE-NEXT: vcvt.s32.f32 s12, s1 -; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vmov r2, s4 -; CHECK-MVE-NEXT: vmov r3, s6 -; CHECK-MVE-NEXT: strd r2, r1, [r0, #16] +; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s5 +; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s4 +; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s6 ; CHECK-MVE-NEXT: vmov r1, s2 ; CHECK-MVE-NEXT: vmov r2, s0 -; CHECK-MVE-NEXT: str r3, [r0, #24] ; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1 ; CHECK-MVE-NEXT: vmov r1, s10 ; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1 +; CHECK-MVE-NEXT: vstr s8, [r0, #20] +; CHECK-MVE-NEXT: vstr s4, [r0, #16] ; CHECK-MVE-NEXT: vstrw.32 q0, [r0] +; CHECK-MVE-NEXT: vstr s6, [r0, #24] ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: test_signed_v7f32_v7i32: Index: llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -133,20 +133,19 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) { ; CHECK-MVE-LABEL: test_unsigned_v5f32_v5i32: ; CHECK-MVE: @ %bb.0: -; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4 -; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2 +; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s3 ; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s1 -; CHECK-MVE-NEXT: vmov r1, s4 -; CHECK-MVE-NEXT: vmov r2, s0 -; CHECK-MVE-NEXT: str r1, [r0, #16] +; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4 ; CHECK-MVE-NEXT: vmov r1, s2 +; CHECK-MVE-NEXT: vmov r2, s0 ; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1 ; CHECK-MVE-NEXT: vmov r1, s6 ; CHECK-MVE-NEXT: vmov r2, s8 ; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1 ; CHECK-MVE-NEXT: vstrw.32 q0, [r0] +; CHECK-MVE-NEXT: vstr s4, [r0, #16] ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: test_unsigned_v5f32_v5i32: @@ -164,22 +163,21 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) { ; CHECK-MVE-LABEL: test_unsigned_v6f32_v6i32: ; CHECK-MVE: @ %bb.0: -; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s5 -; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4 ; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2 ; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s3 ; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s1 -; CHECK-MVE-NEXT: vmov r1, s6 -; CHECK-MVE-NEXT: vmov r2, s4 -; CHECK-MVE-NEXT: strd r2, r1, [r0, #16] +; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s5 +; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4 ; CHECK-MVE-NEXT: vmov r1, s2 ; CHECK-MVE-NEXT: vmov r2, s0 ; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1 ; CHECK-MVE-NEXT: vmov r1, s8 ; CHECK-MVE-NEXT: vmov r2, s10 ; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1 +; CHECK-MVE-NEXT: vstr s6, [r0, #20] ; CHECK-MVE-NEXT: vstrw.32 q0, [r0] +; CHECK-MVE-NEXT: vstr s4, [r0, #16] ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: test_unsigned_v6f32_v6i32: @@ -199,25 +197,23 @@ define arm_aapcs_vfpcc <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) { ; CHECK-MVE-LABEL: test_unsigned_v7f32_v7i32: ; CHECK-MVE: @ %bb.0: -; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s5 -; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4 -; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s6 ; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2 ; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s3 ; CHECK-MVE-NEXT: vcvt.u32.f32 s12, s1 -; CHECK-MVE-NEXT: vmov r1, s8 -; CHECK-MVE-NEXT: vmov r2, s4 -; CHECK-MVE-NEXT: vmov r3, s6 -; CHECK-MVE-NEXT: strd r2, r1, [r0, #16] +; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s5 +; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4 +; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s6 ; CHECK-MVE-NEXT: vmov r1, s2 ; CHECK-MVE-NEXT: vmov r2, s0 -; CHECK-MVE-NEXT: str r3, [r0, #24] ; CHECK-MVE-NEXT: vmov q0[2], q0[0], r2, r1 ; CHECK-MVE-NEXT: vmov r1, s10 ; CHECK-MVE-NEXT: vmov r2, s12 ; CHECK-MVE-NEXT: vmov q0[3], q0[1], r2, r1 +; CHECK-MVE-NEXT: vstr s8, [r0, #20] +; CHECK-MVE-NEXT: vstr s4, [r0, #16] ; CHECK-MVE-NEXT: vstrw.32 q0, [r0] +; CHECK-MVE-NEXT: vstr s6, [r0, #24] ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: test_unsigned_v7f32_v7i32: