Index: lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -72,7 +72,7 @@ /// than or equal 16. Promotion is done by sign or zero extending operands to /// 32 bits, replacing \p I with equivalent 32 bit binary operation, and /// truncating the result of 32 bit binary operation back to \p I's original - /// type. Division operation is not promoted. + /// type. /// /// \returns True if \p I is promoted to equivalent 32 bit binary operation, /// false otherwise. @@ -197,10 +197,6 @@ assert(needsPromotionToI32(I.getType()) && "I does not need promotion to i32"); - if (I.getOpcode() == Instruction::SDiv || - I.getOpcode() == Instruction::UDiv) - return false; - IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); Index: test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll =================================================================== --- test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll +++ test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll @@ -676,6 +676,58 @@ ret i16 %r } +; GCN-LABEL: @udiv_i16( +; SI: %r = udiv i16 %a, %b +; SI-NEXT: ret i16 %r +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = udiv i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] +define i16 @udiv_i16(i16 %a, i16 %b) { + %r = udiv i16 %a, %b + ret i16 %r +} + +; GCN-LABEL: @udiv_exact_i16( +; SI: %r = udiv exact i16 %a, %b +; SI-NEXT: ret i16 %r +; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 +; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = udiv exact i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] +define i16 @udiv_exact_i16(i16 %a, i16 %b) { + %r = udiv exact i16 %a, %b + ret i16 %r +} + +; GCN-LABEL: @sdiv_i16( +; SI: %r = sdiv i16 %a, %b +; SI-NEXT: ret i16 %r +; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 +; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = sdiv i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] +define i16 @sdiv_i16(i16 %a, i16 %b) { + %r = sdiv i16 %a, %b + ret i16 %r +} + +; GCN-LABEL: @sdiv_exact_i16( +; SI: %r = sdiv exact i16 %a, %b +; SI-NEXT: ret i16 %r +; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 +; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32 +; VI-NEXT: %[[R_32:[0-9]+]] = sdiv exact i32 %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 +; VI-NEXT: ret i16 %[[R_16]] +define i16 @sdiv_exact_i16(i16 %a, i16 %b) { + %r = sdiv exact i16 %a, %b + ret i16 %r +} + ; GCN-LABEL: @urem_i16( ; SI: %r = urem i16 %a, %b ; SI-NEXT: ret i16 %r @@ -1195,6 +1247,58 @@ ret <3 x i15> %r } +; GCN-LABEL: @udiv_3xi15( +; SI: %r = udiv <3 x i15> %a, %b +; SI-NEXT: ret <3 x i15> %r +; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = udiv <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> +; VI-NEXT: ret <3 x i15> %[[R_16]] +define <3 x i15> @udiv_3xi15(<3 x i15> %a, <3 x i15> %b) { + %r = udiv <3 x i15> %a, %b + ret <3 x i15> %r +} + +; GCN-LABEL: @udiv_exact_3xi15( +; SI: %r = udiv exact <3 x i15> %a, %b +; SI-NEXT: ret <3 x i15> %r +; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> +; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = udiv exact <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> +; VI-NEXT: ret <3 x i15> %[[R_16]] +define <3 x i15> @udiv_exact_3xi15(<3 x i15> %a, <3 x i15> %b) { + %r = udiv exact <3 x i15> %a, %b + ret <3 x i15> %r +} + +; GCN-LABEL: @sdiv_3xi15( +; SI: %r = sdiv <3 x i15> %a, %b +; SI-NEXT: ret <3 x i15> %r +; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32> +; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = sdiv <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> +; VI-NEXT: ret <3 x i15> %[[R_16]] +define <3 x i15> @sdiv_3xi15(<3 x i15> %a, <3 x i15> %b) { + %r = sdiv <3 x i15> %a, %b + ret <3 x i15> %r +} + +; GCN-LABEL: @sdiv_exact_3xi15( +; SI: %r = sdiv exact <3 x i15> %a, %b +; SI-NEXT: ret <3 x i15> %r +; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32> +; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32> +; VI-NEXT: %[[R_32:[0-9]+]] = sdiv exact <3 x i32> %[[A_32]], %[[B_32]] +; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> +; VI-NEXT: ret <3 x i15> %[[R_16]] +define <3 x i15> @sdiv_exact_3xi15(<3 x i15> %a, <3 x i15> %b) { + %r = sdiv exact <3 x i15> %a, %b + ret <3 x i15> %r +} + ; GCN-LABEL: @urem_3xi15( ; SI: %r = urem <3 x i15> %a, %b ; SI-NEXT: ret <3 x i15> %r