diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3754,35 +3754,56 @@ // AArch64's FCVT instructions saturate when out of range. multiclass FPToIntegerSatPats { + let Predicates = [HasFullFP16] in { def : Pat<(i32 (to_int_sat f16:$Rn, i32)), (!cast(INST # UWHr) f16:$Rn)>; - def : Pat<(i32 (to_int_sat f32:$Rn, i32)), - (!cast(INST # UWSr) f32:$Rn)>; - def : Pat<(i32 (to_int_sat f64:$Rn, i32)), - (!cast(INST # UWDr) f64:$Rn)>; def : Pat<(i64 (to_int_sat f16:$Rn, i64)), (!cast(INST # UXHr) f16:$Rn)>; + } + def : Pat<(i32 (to_int_sat f32:$Rn, i32)), + (!cast(INST # UWSr) f32:$Rn)>; def : Pat<(i64 (to_int_sat f32:$Rn, i64)), (!cast(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat f64:$Rn, i32)), + (!cast(INST # UWDr) f64:$Rn)>; def : Pat<(i64 (to_int_sat f64:$Rn, i64)), (!cast(INST # UXDr) f64:$Rn)>; + + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), + (!cast(INST # SWHri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), + (!cast(INST # SXHri) $Rn, $scale)>; + } + def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), + (!cast(INST # SWSri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), + (!cast(INST # SXSri) $Rn, $scale)>; + def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), + (!cast(INST # SWDri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), + (!cast(INST # SXDri) $Rn, $scale)>; } defm : FPToIntegerSatPats; defm : FPToIntegerSatPats; multiclass FPToIntegerIntPats { + let Predicates = [HasFullFP16] in { def : Pat<(i32 (round f16:$Rn)), (!cast(INST # UWHr) $Rn)>; def : Pat<(i64 (round f16:$Rn)), (!cast(INST # UXHr) $Rn)>; + } def : Pat<(i32 (round f32:$Rn)), (!cast(INST # UWSr) $Rn)>; def : Pat<(i64 (round f32:$Rn)), (!cast(INST # UXSr) $Rn)>; def : Pat<(i32 (round f64:$Rn)), (!cast(INST # UWDr) $Rn)>; def : Pat<(i64 (round f64:$Rn)), (!cast(INST # UXDr) $Rn)>; + let Predicates = [HasFullFP16] in { def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), (!cast(INST # SWHri) $Rn, $scale)>; def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), (!cast(INST # SXHri) $Rn, $scale)>; + } def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), (!cast(INST # SWSri) $Rn, $scale)>; def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), @@ -3807,10 +3828,12 @@ (!cast(INST # UXDr) f64:$Rn)>; // These instructions saturate like fp_to_[su]int_sat. + let Predicates = [HasFullFP16] in { def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), (!cast(INST # UWHr) f16:$Rn)>; def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), (!cast(INST # UXHr) f16:$Rn)>; + } def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), (!cast(INST # UWSr) f32:$Rn)>; def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -679,10 +679,7 @@ define i32 @fcvtzs_sat_f32_i32_7(float %flt) { ; CHECK-LABEL: fcvtzs_sat_f32_i32_7: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1124073472 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmul s0, s0, s1 -; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: fcvtzs w0, s0, #7 ; CHECK-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix) @@ -692,10 +689,7 @@ define i32 @fcvtzs_sat_f32_i32_32(float %flt) { ; CHECK-LABEL: fcvtzs_sat_f32_i32_32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1333788672 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmul s0, s0, s1 -; CHECK-NEXT: fcvtzs w0, s0 +; CHECK-NEXT: fcvtzs w0, s0, #32 ; CHECK-NEXT: ret %fix = fmul float %flt, 4294967296.0 %cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix) @@ -705,10 +699,7 @@ define i64 @fcvtzs_sat_f32_i64_64(float %flt) { ; CHECK-LABEL: fcvtzs_sat_f32_i64_64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1602224128 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmul s0, s0, s1 -; CHECK-NEXT: fcvtzs x0, s0 +; CHECK-NEXT: fcvtzs x0, s0, #64 ; CHECK-NEXT: ret %fix = fmul float %flt, 18446744073709551616.0 %cvt = call i64 @llvm.fptosi.sat.i64.f32(float %fix) @@ -718,10 +709,7 @@ define i32 @fcvtzs_sat_f64_i32_7(double %dbl) { ; CHECK-LABEL: fcvtzs_sat_f64_i32_7: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4638707616191610880 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmul d0, d0, d1 -; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: fcvtzs w0, d0, #7 ; CHECK-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix) @@ -731,10 +719,7 @@ define i32 @fcvtzs_sat_f64_i32_32(double %dbl) { ; CHECK-LABEL: fcvtzs_sat_f64_i32_32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4751297606875873280 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmul d0, d0, d1 -; CHECK-NEXT: fcvtzs w0, d0 +; CHECK-NEXT: fcvtzs w0, d0, #32 ; CHECK-NEXT: ret %fix = fmul double %dbl, 4294967296.0 %cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix) @@ -744,10 +729,7 @@ define i64 @fcvtzs_sat_f64_i64_7(double %dbl) { ; CHECK-LABEL: fcvtzs_sat_f64_i64_7: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4638707616191610880 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmul d0, d0, d1 -; CHECK-NEXT: fcvtzs x0, d0 +; CHECK-NEXT: fcvtzs x0, d0, #7 ; CHECK-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix) @@ -757,10 +739,7 @@ define i64 @fcvtzs_sat_f64_i64_64(double %dbl) { ; CHECK-LABEL: fcvtzs_sat_f64_i64_64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4895412794951729152 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmul d0, d0, d1 -; CHECK-NEXT: fcvtzs x0, d0 +; CHECK-NEXT: fcvtzs x0, d0, #64 ; CHECK-NEXT: ret %fix = fmul double %dbl, 18446744073709551616.0 %cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix) @@ -781,10 +760,7 @@ ; ; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_7: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: adrp x8, .LCPI55_0 -; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0] -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: fcvtzs w0, h0 +; CHECK-FP16-NEXT: fcvtzs w0, h0, #7 ; CHECK-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix) @@ -805,10 +781,7 @@ ; ; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_15: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: adrp x8, .LCPI56_0 -; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0] -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: fcvtzs w0, h0 +; CHECK-FP16-NEXT: fcvtzs w0, h0, #15 ; CHECK-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix) @@ -829,10 +802,7 @@ ; ; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_7: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: adrp x8, .LCPI57_0 -; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0] -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: fcvtzs x0, h0 +; CHECK-FP16-NEXT: fcvtzs x0, h0, #7 ; CHECK-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix) @@ -853,10 +823,7 @@ ; ; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_15: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: adrp x8, .LCPI58_0 -; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0] -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: fcvtzs x0, h0 +; CHECK-FP16-NEXT: fcvtzs x0, h0, #15 ; CHECK-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix) @@ -875,10 +842,7 @@ define i32 @fcvtzu_sat_f32_i32_7(float %flt) { ; CHECK-LABEL: fcvtzu_sat_f32_i32_7: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1124073472 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmul s0, s0, s1 -; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: fcvtzu w0, s0, #7 ; CHECK-NEXT: ret %fix = fmul float %flt, 128.0 %cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix) @@ -888,10 +852,7 @@ define i32 @fcvtzu_sat_f32_i32_32(float %flt) { ; CHECK-LABEL: fcvtzu_sat_f32_i32_32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1333788672 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmul s0, s0, s1 -; CHECK-NEXT: fcvtzu w0, s0 +; CHECK-NEXT: fcvtzu w0, s0, #32 ; CHECK-NEXT: ret %fix = fmul float %flt, 4294967296.0 %cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix) @@ -901,10 +862,7 @@ define i64 @fcvtzu_sat_f32_i64_64(float %flt) { ; CHECK-LABEL: fcvtzu_sat_f32_i64_64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1602224128 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmul s0, s0, s1 -; CHECK-NEXT: fcvtzu x0, s0 +; CHECK-NEXT: fcvtzu x0, s0, #64 ; CHECK-NEXT: ret %fix = fmul float %flt, 18446744073709551616.0 %cvt = call i64 @llvm.fptoui.sat.i64.f32(float %fix) @@ -914,10 +872,7 @@ define i32 @fcvtzu_sat_f64_i32_7(double %dbl) { ; CHECK-LABEL: fcvtzu_sat_f64_i32_7: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4638707616191610880 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmul d0, d0, d1 -; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: fcvtzu w0, d0, #7 ; CHECK-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix) @@ -927,10 +882,7 @@ define i32 @fcvtzu_sat_f64_i32_32(double %dbl) { ; CHECK-LABEL: fcvtzu_sat_f64_i32_32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4751297606875873280 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmul d0, d0, d1 -; CHECK-NEXT: fcvtzu w0, d0 +; CHECK-NEXT: fcvtzu w0, d0, #32 ; CHECK-NEXT: ret %fix = fmul double %dbl, 4294967296.0 %cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix) @@ -940,10 +892,7 @@ define i64 @fcvtzu_sat_f64_i64_7(double %dbl) { ; CHECK-LABEL: fcvtzu_sat_f64_i64_7: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4638707616191610880 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmul d0, d0, d1 -; CHECK-NEXT: fcvtzu x0, d0 +; CHECK-NEXT: fcvtzu x0, d0, #7 ; CHECK-NEXT: ret %fix = fmul double %dbl, 128.0 %cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix) @@ -953,10 +902,7 @@ define i64 @fcvtzu_sat_f64_i64_64(double %dbl) { ; CHECK-LABEL: fcvtzu_sat_f64_i64_64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4895412794951729152 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fmul d0, d0, d1 -; CHECK-NEXT: fcvtzu x0, d0 +; CHECK-NEXT: fcvtzu x0, d0, #64 ; CHECK-NEXT: ret %fix = fmul double %dbl, 18446744073709551616.0 %cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix) @@ -977,10 +923,7 @@ ; ; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_7: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: adrp x8, .LCPI66_0 -; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0] -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: fcvtzu w0, h0 +; CHECK-FP16-NEXT: fcvtzu w0, h0, #7 ; CHECK-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix) @@ -1001,10 +944,7 @@ ; ; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_15: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: adrp x8, .LCPI67_0 -; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0] -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: fcvtzu w0, h0 +; CHECK-FP16-NEXT: fcvtzu w0, h0, #15 ; CHECK-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix) @@ -1025,10 +965,7 @@ ; ; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_7: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: adrp x8, .LCPI68_0 -; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0] -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: fcvtzu x0, h0 +; CHECK-FP16-NEXT: fcvtzu x0, h0, #7 ; CHECK-FP16-NEXT: ret %fix = fmul half %dbl, 128.0 %cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix) @@ -1049,10 +986,7 @@ ; ; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_15: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: adrp x8, .LCPI69_0 -; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0] -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: fcvtzu x0, h0 +; CHECK-FP16-NEXT: fcvtzu x0, h0, #15 ; CHECK-FP16-NEXT: ret %fix = fmul half %dbl, 32768.0 %cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix)