Index: llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -64,12 +64,21 @@ cl::ReallyHidden, cl::init(true)); +// Legalize 64-bit division by using the generic IR expansion. static cl::opt ExpandDiv64InIR( "amdgpu-codegenprepare-expand-div64", cl::desc("Expand 64-bit division in AMDGPUCodeGenPrepare"), cl::ReallyHidden, cl::init(false)); +// Leave all division operations as they are. This supersedes ExpandDiv64InIR +// and is used for testing the legalizer. +static cl::opt DisableIDivExpand( + "amdgpu-codegenprepare-disable-idiv-expansion", + cl::desc("Prevent expanding integer division in AMDGPUCodeGenPrepare"), + cl::ReallyHidden, + cl::init(false)); + class AMDGPUCodeGenPrepare : public FunctionPass, public InstVisitor { const GCNSubtarget *ST = nullptr; @@ -1203,7 +1212,8 @@ if ((Opc == Instruction::URem || Opc == Instruction::UDiv || Opc == Instruction::SRem || Opc == Instruction::SDiv) && - ScalarSize <= 64) { + ScalarSize <= 64 && + !DisableIDivExpand) { Value *Num = I.getOperand(0); Value *Den = I.getOperand(1); IRBuilder<> Builder(&I); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll @@ -0,0 +1,198 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion -mtriple=amdgcn-amd-amdhsa -stop-after=irtranslator < %s | FileCheck %s + +define i32 @udiv_i32(i32 %num, i32 %den) { + ; CHECK-LABEL: name: udiv_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[UDIV]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %result = udiv i32 %num, %den + ret i32 %result +} + +define <2 x i32> @udiv_v2i32(<2 x i32> %num, <2 x i32> %den) { + ; CHECK-LABEL: name: udiv_v2i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[UDIV:%[0-9]+]]:_(<2 x s32>) = G_UDIV [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UDIV]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %result = udiv <2 x i32> %num, %den + ret <2 x i32> %result +} + +define i32 @udiv_i32_pow2k_denom(i32 %num) { + ; CHECK-LABEL: name: udiv_i32_pow2k_denom + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 + ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[C]] + ; CHECK: $vgpr0 = COPY [[UDIV]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %result = udiv i32 %num, 4096 + ret i32 %result +} + +define <2 x i32> @udiv_v2i32_pow2k_denom(<2 x i32> %num) { + ; CHECK-LABEL: name: udiv_v2i32_pow2k_denom + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: [[UDIV:%[0-9]+]]:_(<2 x s32>) = G_UDIV [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UDIV]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + %result = udiv <2 x i32> %num, + ret <2 x i32> %result +} + +define i32 @udiv_i32_oddk_denom(i32 %num) { + ; CHECK-LABEL: name: udiv_i32_oddk_denom + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1235195 + ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[C]] + ; CHECK: $vgpr0 = COPY [[UDIV]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + %result = udiv i32 %num, 1235195 + ret i32 %result +} + +define <2 x i32> @udiv_v2i32_oddk_denom(<2 x i32> %num) { + ; CHECK-LABEL: name: udiv_v2i32_oddk_denom + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1235195 + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: [[UDIV:%[0-9]+]]:_(<2 x s32>) = G_UDIV [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UDIV]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + %result = udiv <2 x i32> %num, + ret <2 x i32> %result +} + +define i32 @udiv_i32_pow2_shl_denom(i32 %x, i32 %y) { + ; CHECK-LABEL: name: udiv_i32_pow2_shl_denom + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) + ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[SHL]] + ; CHECK: $vgpr0 = COPY [[UDIV]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %shl.y = shl i32 4096, %y + %r = udiv i32 %x, %shl.y + ret i32 %r +} + +define <2 x i32> @udiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { + ; CHECK-LABEL: name: udiv_v2i32_pow2_shl_denom + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[BUILD_VECTOR2]], [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK: [[UDIV:%[0-9]+]]:_(<2 x s32>) = G_UDIV [[BUILD_VECTOR]], [[SHL]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UDIV]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %shl.y = shl <2 x i32> , %y + %r = udiv <2 x i32> %x, %shl.y + ret <2 x i32> %r +} + +define i32 @udiv_i32_24bit(i32 %num, i32 %den) { + ; CHECK-LABEL: name: udiv_i32_24bit + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] + ; CHECK: $vgpr0 = COPY [[UDIV]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %num.mask = and i32 %num, 16777215 + %den.mask = and i32 %den, 16777215 + %result = udiv i32 %num.mask, %den.mask + ret i32 %result +} + +define <2 x i32> @udiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { + ; CHECK-LABEL: name: udiv_v2i32_24bit + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; CHECK: [[UDIV:%[0-9]+]]:_(<2 x s32>) = G_UDIV [[AND]], [[AND1]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UDIV]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %num.mask = and <2 x i32> %num, + %den.mask = and <2 x i32> %den, + %result = udiv <2 x i32> %num.mask, %den.mask + ret <2 x i32> %result +}