Index: llvm/lib/Target/NVPTX/NVPTXInstrInfo.td =================================================================== --- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1039,39 +1039,39 @@ [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>, Requires<[allowUnsafeFPMath]>; -// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)), +// Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)), // i.e. "poor man's fmod()" // frem - f32 FTZ def : Pat<(frem Float32Regs:$x, Float32Regs:$y), (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 - (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ), + (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRZI_FTZ), Float32Regs:$y))>, Requires<[doF32FTZ]>; def : Pat<(frem Float32Regs:$x, fpimm:$y), (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 - (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ), + (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRZI_FTZ), fpimm:$y))>, Requires<[doF32FTZ]>; // frem - f32 def : Pat<(frem Float32Regs:$x, Float32Regs:$y), (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 - (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI), + (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRZI), Float32Regs:$y))>; def : Pat<(frem Float32Regs:$x, fpimm:$y), (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 - (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI), + (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRZI), fpimm:$y))>; // frem - f64 def : Pat<(frem Float64Regs:$x, Float64Regs:$y), (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 - (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI), + (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRZI), Float64Regs:$y))>; def : Pat<(frem Float64Regs:$x, fpimm:$y), (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 - (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI), + (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRZI), fpimm:$y))>; //----------------------------------- Index: llvm/test/CodeGen/NVPTX/f16-instructions.ll =================================================================== --- llvm/test/CodeGen/NVPTX/f16-instructions.ll +++ llvm/test/CodeGen/NVPTX/f16-instructions.ll @@ -164,13 +164,13 @@ ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]]; -; CHECK-NOFTZ-NEXT: cvt.rmi.f32.f32 [[DI:%f[0-9]+]], [[D]]; +; CHECK-NOFTZ-NEXT: cvt.rzi.f32.f32 [[DI:%f[0-9]+]], [[D]]; ; CHECK-NOFTZ-NEXT: mul.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]]; ; CHECK-NOFTZ-NEXT: sub.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[FA:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[FB:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[D:%f[0-9]+]], [[FA]], [[FB]]; -; CHECK-F16-FTZ-NEXT: cvt.rmi.ftz.f32.f32 [[DI:%f[0-9]+]], [[D]]; +; CHECK-F16-FTZ-NEXT: cvt.rzi.ftz.f32.f32 [[DI:%f[0-9]+]], [[D]]; ; CHECK-F16-FTZ-NEXT: mul.ftz.f32 [[RI:%f[0-9]+]], [[DI]], [[FB]]; ; CHECK-F16-FTZ-NEXT: sub.ftz.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]]; ; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; Index: llvm/test/CodeGen/NVPTX/f16x2-instructions.ll =================================================================== --- llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -236,12 +236,12 @@ ; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]; ; -- frem(a[0],b[0]). ; CHECK-DAG: div.rn.f32 [[FD0:%f[0-9]+]], [[FA0]], [[FB0]]; -; CHECK-DAG: cvt.rmi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]]; +; CHECK-DAG: cvt.rzi.f32.f32 [[DI0:%f[0-9]+]], [[FD0]]; ; CHECK-DAG: mul.f32 [[RI0:%f[0-9]+]], [[DI0]], [[FB0]]; ; CHECK-DAG: sub.f32 [[RF0:%f[0-9]+]], [[FA0]], [[RI0]]; ; -- frem(a[1],b[1]). ; CHECK-DAG: div.rn.f32 [[FD1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-DAG: cvt.rmi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]]; +; CHECK-DAG: cvt.rzi.f32.f32 [[DI1:%f[0-9]+]], [[FD1]]; ; CHECK-DAG: mul.f32 [[RI1:%f[0-9]+]], [[DI1]], [[FB1]]; ; CHECK-DAG: sub.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RI1]]; ; -- convert back to f16.