Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2886,19 +2886,29 @@ APInt::ms magics = Divisor.magic(); // Multiply the numerator (operand 0) by the magic value - // FIXME: We should support doing a MUL in a wider type + // FIXME: expand using MULHS for vector types after addressing possible + // regressions in X86 backend. + unsigned Opcode; + if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) + : isOperationLegalOrCustom(ISD::MULHS, VT)) + Opcode = ISD::MULHS; + else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) + : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + Opcode = ISD::SMUL_LOHI; + else if (!IsAfterLegalization && !VT.isVector()) + Opcode = ISD::MULHS; + else + return SDValue(); + SDValue Q; - if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : - isOperationLegalOrCustom(ISD::MULHS, VT)) + if (Opcode == ISD::MULHS) Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), DAG.getConstant(magics.m, dl, VT)); - else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : - isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + else Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N->getOperand(0), DAG.getConstant(magics.m, dl, VT)).getNode(), 1); - else - return SDValue(); // No mulhs or equvialent + // If d > 0 and m < 0, add the numerator if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); @@ -2965,16 +2975,25 @@ } // Multiply the numerator (operand 0) by the magic value - // FIXME: We should support doing a MUL in a wider type - if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : - isOperationLegalOrCustom(ISD::MULHU, VT)) + // FIXME: expand using MULHU for vector types after addressing possible + // regressions in X86 backend. + unsigned Opcode; + if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) + : isOperationLegalOrCustom(ISD::MULHU, VT)) + Opcode = ISD::MULHU; + else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) + : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + Opcode = ISD::UMUL_LOHI; + else if (!IsAfterLegalization && !VT.isVector()) + Opcode = ISD::MULHU; + else + return SDValue(); + + if (Opcode == ISD::MULHU) Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT)); - else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : - isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + else Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, DAG.getConstant(magics.m, dl, VT)).getNode(), 1); - else - return SDValue(); // No mulhu or equvialent Created->push_back(Q.getNode()); Index: lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- lib/Target/AMDGPU/SOPInstructions.td +++ lib/Target/AMDGPU/SOPInstructions.td @@ -846,6 +846,12 @@ (S_ADD_U32 $src0, $src1) >; +// Similarly for V_SUB_I32/S_SUB_U32. +def : Pat < + (i32 (subc i32:$src0, i32:$src1)), + (S_SUB_U32 $src0, $src1) +>; + //===----------------------------------------------------------------------===// // SOPP Patterns //===----------------------------------------------------------------------===// Index: test/CodeGen/AMDGPU/sdiv.ll =================================================================== --- test/CodeGen/AMDGPU/sdiv.ll +++ test/CodeGen/AMDGPU/sdiv.ll @@ -136,6 +136,27 @@ ret void } +; FUNC-LABEL: {{^}}sdiv_i32_const: +; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493 +; SI-NOT: v_rcp +define void @sdiv_i32_const(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32, i32 addrspace(1)* %in + %result = sdiv i32 %num, 7 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}sdiv_i64_const: +; SI-DAG: s_mov_b32 [[MAGIC_LO:s[0-9]+]], 0x24924925 +; SI-DAG: s_mov_b32 [[MAGIC_HI:s[0-9]+]], 0x49249249 +; SI-NOT: v_rcp +define void @sdiv_i64_const(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %num = load i64, i64 addrspace(1)* %in + %result = sdiv i64 %num, 7 + store i64 %result, i64 addrspace(1)* %out + ret void +} + ; Tests for 64-bit divide bypass. ; define void @test_get_quotient(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { ; %result = sdiv i64 %a, %b Index: test/CodeGen/AMDGPU/udiv.ll =================================================================== --- test/CodeGen/AMDGPU/udiv.ll +++ test/CodeGen/AMDGPU/udiv.ll @@ -145,3 +145,24 @@ store i32 %result.ext, i32 addrspace(1)* %out ret void } + +; FUNC-LABEL: {{^}}udiv_i32_const: +; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925 +; SI-NOT: v_rcp +define void @udiv_i32_const(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32, i32 addrspace(1)* %in + %result = udiv i32 %num, 7 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}udiv_i64_const: +; SI-DAG: s_mov_b32 [[MAGIC_HI:s[0-9]+]], 0x24924924 +; SI-DAG: s_mov_b32 [[MAGIC_LO:s[0-9]+]], 0x92492493 +; SI-NOT: v_rcp +define void @udiv_i64_const(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %num = load i64, i64 addrspace(1)* %in + %result = udiv i64 %num, 7 + store i64 %result, i64 addrspace(1)* %out + ret void +} Index: test/CodeGen/BPF/sdiv_error.ll =================================================================== --- test/CodeGen/BPF/sdiv_error.ll +++ test/CodeGen/BPF/sdiv_error.ll @@ -1,6 +1,6 @@ ; RUN: not llc -march=bpf < %s 2> %t1 ; RUN: FileCheck %s < %t1 -; CHECK: Unsupport signed division +; CHECK: Cannot select: t27: i64,i64 = smul_lohi ; Function Attrs: norecurse nounwind readnone define i32 @test(i32 %len) #0 { Index: test/CodeGen/SPARC/rem.ll =================================================================== --- test/CodeGen/SPARC/rem.ll +++ test/CodeGen/SPARC/rem.ll @@ -24,12 +24,38 @@ ; PR18150 ; CHECK-LABEL: test3 -; CHECK: sethi 2545, [[R0:%[gilo][0-7]]] -; CHECK: or [[R0]], 379, [[R1:%[gilo][0-7]]] -; CHECK: mulx %o0, [[R1]], [[R2:%[gilo][0-7]]] -; CHECK: udivx [[R2]], 1021, [[R3:%[gilo][0-7]]] -; CHECK: mulx [[R3]], 1021, [[R4:%[gilo][0-7]]] -; CHECK: sub [[R2]], [[R4]], %o0 +; CHECK: sethi 2545, %o1 +; CHECK-NEXT: or %o1, 379, %o1 +; CHECK-NEXT: mulx %o0, %o1, %o0 +; CHECK-NEXT: srl %o0, 0, %o1 +; CHECK-NEXT: sethi 12324, %o2 +; CHECK-NEXT: or %o2, 108, %o2 +; CHECK-NEXT: mulx %o1, %o2, %o3 +; CHECK-NEXT: sethi 1331003, %o4 +; CHECK-NEXT: or %o4, 435, %o4 +; CHECK-NEXT: mulx %o1, %o4, %o1 +; CHECK-NEXT: srlx %o1, 32, %o1 +; CHECK-NEXT: srlx %o0, 32, %o5 +; CHECK-NEXT: mulx %o5, %o4, %o4 +; CHECK-NEXT: srlx %o4, 32, %g2 +; CHECK-NEXT: srlx %o3, 32, %g3 +; CHECK-NEXT: mulx %o5, %o2, %o2 +; CHECK-NEXT: srlx %o2, 32, %o5 +; CHECK-NEXT: addcc %o1, %o3, %o1 +; CHECK-NEXT: addxcc %g3, %g2, %o3 +; CHECK-NEXT: addxcc %o5, 0, %o5 +; CHECK-NEXT: addcc %o1, %o4, %o1 +; CHECK-NEXT: addxcc %o3, %o2, %o1 +; CHECK-NEXT: addxcc %o5, 0, %o2 +; CHECK-NEXT: srl %o1, 0, %o1 +; CHECK-NEXT: sllx %o2, 32, %o2 +; CHECK-NEXT: or %o1, %o2, %o1 +; CHECK-NEXT: sub %o0, %o1, %o2 +; CHECK-NEXT: srlx %o2, 1, %o2 +; CHECK-NEXT: add %o2, %o1, %o1 +; CHECK-NEXT: srlx %o1, 9, %o1 +; CHECK-NEXT: mulx %o1, 1021, %o1 +; CHECK-NEXT: retl define i64 @test3(i64 %b) { entry: