Index: lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- lib/Target/NVPTX/NVPTXISelLowering.cpp +++ lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -124,6 +124,10 @@ // condition branches. setJumpIsExpensive(true); + // Wide divides are _very_ slow. Try to reduce the width of the divide if + // possible. + addBypassSlowDiv(64, 32); + // By default, use the Source scheduling if (sched4reg) setSchedulingPreference(Sched::RegPressure); Index: test/CodeGen/NVPTX/bypass-div.ll =================================================================== --- test/CodeGen/NVPTX/bypass-div.ll +++ test/CodeGen/NVPTX/bypass-div.ll @@ -0,0 +1,80 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s + +; 64-bit divides and rems should be split into a fast and slow path where +; the fast path uses a 32-bit operation. + +define void @sdiv64(i64 %a, i64 %b, i64* %retptr) { +; CHECK-LABEL: sdiv64( +; CHECK: div.s64 +; CHECK: div.u32 +; CHECK: ret + %d = sdiv i64 %a, %b + store i64 %d, i64* %retptr + ret void +} + +define void @udiv64(i64 %a, i64 %b, i64* %retptr) { +; CHECK-LABEL: udiv64( +; CHECK: div.u64 +; CHECK: div.u32 +; CHECK: ret + %d = udiv i64 %a, %b + store i64 %d, i64* %retptr + ret void +} + +define void @srem64(i64 %a, i64 %b, i64* %retptr) { +; CHECK-LABEL: srem64( +; CHECK: rem.s64 +; CHECK: rem.u32 +; CHECK: ret + %d = srem i64 %a, %b + store i64 %d, i64* %retptr + ret void +} + +define void @urem64(i64 %a, i64 %b, i64* %retptr) { +; CHECK-LABEL: urem64( +; CHECK: rem.u64 +; CHECK: rem.u32 +; CHECK: ret + %d = urem i64 %a, %b + store i64 %d, i64* %retptr + ret void +} + +define void @sdiv32(i32 %a, i32 %b, i32* %retptr) { +; CHECK-LABEL: sdiv32( +; CHECK: div.s32 +; CHECK-NOT: div. + %d = sdiv i32 %a, %b + store i32 %d, i32* %retptr + ret void +} + +define void @udiv32(i32 %a, i32 %b, i32* %retptr) { +; CHECK-LABEL: udiv32( +; CHECK: div.u32 +; CHECK-NOT: div. + %d = udiv i32 %a, %b + store i32 %d, i32* %retptr + ret void +} + +define void @srem32(i32 %a, i32 %b, i32* %retptr) { +; CHECK-LABEL: srem32( +; CHECK: rem.s32 +; CHECK-NOT: rem. + %d = srem i32 %a, %b + store i32 %d, i32* %retptr + ret void +} + +define void @urem32(i32 %a, i32 %b, i32* %retptr) { +; CHECK-LABEL: urem32( +; CHECK: rem.u32 +; CHECK-NOT: rem. + %d = urem i32 %a, %b + store i32 %d, i32* %retptr + ret void +}