diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5990,6 +5990,19 @@ DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y); return SDValue(LoHi.getNode(), 1); } + // If type twice as wide legal, widen and use a mul plus a shift. + if (!VT.isVector()) { + unsigned Size = VT.getSizeInBits(); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2); + if (isOperationLegal(ISD::MUL, WideVT)) { + X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X); + Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y); + Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y); + Y = DAG.getNode(ISD::SRL, dl, WideVT, Y, + DAG.getShiftAmountConstant(EltBits, WideVT, dl)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Y); + } + } return SDValue(); }; @@ -6163,6 +6176,19 @@ DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y); return SDValue(LoHi.getNode(), 1); } + // If type twice as wide legal, widen and use a mul plus a shift. + if (!VT.isVector()) { + unsigned Size = VT.getSizeInBits(); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2); + if (isOperationLegal(ISD::MUL, WideVT)) { + X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X); + Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y); + Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y); + Y = DAG.getNode(ISD::SRL, dl, WideVT, Y, + DAG.getShiftAmountConstant(EltBits, WideVT, dl)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Y); + } + } return SDValue(); // No mulhu or equivalent }; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -580,6 +580,8 @@ setOperationAction(ISD::MULHS, MVT::i32, Expand); // AArch64 doesn't have {U|S}MUL_LOHI. + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); diff --git a/llvm/test/CodeGen/VE/Scalar/div.ll b/llvm/test/CodeGen/VE/Scalar/div.ll --- a/llvm/test/CodeGen/VE/Scalar/div.ll +++ b/llvm/test/CodeGen/VE/Scalar/div.ll @@ -149,7 +149,11 @@ define signext i32 @divi32ri(i32 signext %a, i32 signext %b) { ; CHECK-LABEL: divi32ri: ; CHECK: # %bb.0: -; CHECK-NEXT: divs.w.sx %s0, %s0, (62)0 +; CHECK-NEXT: lea %s1, 1431655766 +; CHECK-NEXT: muls.l %s0, %s0, %s1 +; CHECK-NEXT: srl %s1, %s0, 63 +; CHECK-NEXT: srl %s0, %s0, 32 +; CHECK-NEXT: adds.w.sx %s0, %s0, %s1 ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: b.l.t (, %s10) %r = sdiv i32 %a, 3 @@ -185,8 +189,10 @@ define zeroext i32 @divu32ri(i32 zeroext %a, i32 zeroext %b) { ; CHECK-LABEL: divu32ri: ; CHECK: # %bb.0: -; CHECK-NEXT: divu.w %s0, %s0, (62)0 -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: lea %s1, -1431655765 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: muls.l %s0, %s0, %s1 +; CHECK-NEXT: srl %s0, %s0, 33 ; CHECK-NEXT: b.l.t (, %s10) %r = udiv i32 %a, 3 ret i32 %r diff --git a/llvm/test/CodeGen/VE/Scalar/rem.ll b/llvm/test/CodeGen/VE/Scalar/rem.ll --- a/llvm/test/CodeGen/VE/Scalar/rem.ll +++ b/llvm/test/CodeGen/VE/Scalar/rem.ll @@ -165,7 +165,11 @@ define signext i32 @remi32ri(i32 signext %a) { ; CHECK-LABEL: remi32ri: ; CHECK: # %bb.0: -; CHECK-NEXT: divs.w.sx %s1, %s0, (62)0 +; CHECK-NEXT: lea %s1, 1431655766 +; CHECK-NEXT: muls.l %s1, %s0, %s1 +; CHECK-NEXT: srl %s2, %s1, 63 +; CHECK-NEXT: srl %s1, %s1, 32 +; CHECK-NEXT: adds.w.sx %s1, %s1, %s2 ; CHECK-NEXT: muls.w.sx %s1, 3, %s1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 @@ -205,7 +209,10 @@ define zeroext i32 @remu32ri(i32 zeroext %a) { ; CHECK-LABEL: remu32ri: ; CHECK: # %bb.0: -; CHECK-NEXT: divu.w %s1, %s0, (62)0 +; CHECK-NEXT: lea %s1, -1431655765 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: muls.l %s1, %s0, %s1 +; CHECK-NEXT: srl %s1, %s1, 33 ; CHECK-NEXT: muls.w.sx %s1, 3, %s1 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s1 ; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 diff --git a/llvm/test/CodeGen/VE/Vector/vec_divrem.ll b/llvm/test/CodeGen/VE/Vector/vec_divrem.ll --- a/llvm/test/CodeGen/VE/Vector/vec_divrem.ll +++ b/llvm/test/CodeGen/VE/Vector/vec_divrem.ll @@ -8,13 +8,18 @@ ; CHECK-LABEL: udiv_by_minus_one: ; CHECK: # %bb.0: ; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: lea %s4, 16843010 +; CHECK-NEXT: muls.l %s0, %s0, %s4 +; CHECK-NEXT: srl %s0, %s0, 32 ; CHECK-NEXT: and %s1, %s1, (56)0 +; CHECK-NEXT: muls.l %s1, %s1, %s4 +; CHECK-NEXT: srl %s1, %s1, 32 ; CHECK-NEXT: and %s2, %s2, (56)0 +; CHECK-NEXT: muls.l %s2, %s2, %s4 +; CHECK-NEXT: srl %s2, %s2, 32 ; CHECK-NEXT: and %s3, %s3, (56)0 -; CHECK-NEXT: divu.w %s3, %s3, (56)0 -; CHECK-NEXT: divu.w %s2, %s2, (56)0 -; CHECK-NEXT: divu.w %s1, %s1, (56)0 -; CHECK-NEXT: divu.w %s0, %s0, (56)0 +; CHECK-NEXT: muls.l %s3, %s3, %s4 +; CHECK-NEXT: srl %s3, %s3, 32 ; CHECK-NEXT: b.l.t (, %s10) %r = udiv <4 x i8> %x, ret <4 x i8> %r @@ -27,16 +32,21 @@ ; CHECK-NEXT: and %s1, %s1, (56)0 ; CHECK-NEXT: and %s2, %s2, (56)0 ; CHECK-NEXT: and %s3, %s3, (56)0 -; CHECK-NEXT: divu.w %s4, %s3, (56)0 -; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0 -; CHECK-NEXT: subs.w.sx %s3, %s3, %s4 -; CHECK-NEXT: divu.w %s4, %s2, (56)0 -; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0 -; CHECK-NEXT: subs.w.sx %s2, %s2, %s4 -; CHECK-NEXT: divu.w %s4, %s1, (56)0 -; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0 -; CHECK-NEXT: subs.w.sx %s1, %s1, %s4 -; CHECK-NEXT: divu.w %s4, %s0, (56)0 +; CHECK-NEXT: lea %s4, 16843010 +; CHECK-NEXT: muls.l %s5, %s3, %s4 +; CHECK-NEXT: srl %s5, %s5, 32 +; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0 +; CHECK-NEXT: subs.w.sx %s3, %s3, %s5 +; CHECK-NEXT: muls.l %s5, %s2, %s4 +; CHECK-NEXT: srl %s5, %s5, 32 +; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0 +; CHECK-NEXT: subs.w.sx %s2, %s2, %s5 +; CHECK-NEXT: muls.l %s5, %s1, %s4 +; CHECK-NEXT: srl %s5, %s5, 32 +; CHECK-NEXT: muls.w.sx %s5, %s5, (56)0 +; CHECK-NEXT: subs.w.sx %s1, %s1, %s5 +; CHECK-NEXT: muls.l %s4, %s0, %s4 +; CHECK-NEXT: srl %s4, %s4, 32 ; CHECK-NEXT: muls.w.sx %s4, %s4, (56)0 ; CHECK-NEXT: subs.w.sx %s0, %s0, %s4 ; CHECK-NEXT: b.l.t (, %s10)