Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -3941,6 +3941,29 @@ Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); break; + case ISD::UMULO: + // We generate a UMUL_LOHI and then check if the high word is 0. + ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); + Value = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(Op.getValueType(), Op.getValueType()), + LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), + DAG.getConstant(0, dl, MVT::i32)); + Value = Value.getValue(0); // We only want the low 32 bits for the result. + break; + case ISD::SMULO: + // We generate a SMUL_LOHI and then check if all the bits of the high word + // are the same as the sign bit of the low word. + ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); + Value = DAG.getNode(ISD::SMUL_LOHI, dl, + DAG.getVTList(Op.getValueType(), Op.getValueType()), + LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), + DAG.getNode(ISD::SRA, dl, Op.getValueType(), + Value.getValue(0), + DAG.getConstant(31, dl, MVT::i32))); + Value = Value.getValue(0); // We only want the low 32 bits for the result. + break; } // switch (...) return std::make_pair(Value, OverflowCmp); @@ -4453,10 +4476,12 @@ SDValue Dest = Op.getOperand(2); SDLoc dl(Op); - // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction. + // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch + // instruction. unsigned Opc = Cond.getOpcode(); - if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO || - Opc == ISD::SSUBO || Opc == ISD::USUBO)) { + if (Cond.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { // Only lower legal XALUO ops. if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) return SDValue(); @@ -4500,11 +4525,13 @@ } } - // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction. + // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch + // instruction. unsigned Opc = LHS.getOpcode(); if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) && (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { // Only lower legal XALUO ops. if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) return SDValue(); Index: test/CodeGen/ARM/overflow-intrinsic-optimizations.ll =================================================================== --- test/CodeGen/ARM/overflow-intrinsic-optimizations.ll +++ test/CodeGen/ARM/overflow-intrinsic-optimizations.ll @@ -76,6 +76,44 @@ } +define i32 @smul(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: smul: +; CHECK: smull r0, r[[RHI:[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}} +; CHECK-NEXT: cmp r[[RHI]], r0, asr #31 +; CHECK-NEXT: moveq pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 +} + +define i32 @umul(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: umul: +; CHECK: umull r0, r[[RHI:[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}} +; CHECK-NEXT: cmp r[[RHI]], #0 +; CHECK-NEXT: moveq pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 +} + define void @sum(i32* %a, i32* %b, i32 %n) local_unnamed_addr #0 { ; CHECK-LABEL: sum: ; CHECK: ldr [[R0:r[0-9]+]], @@ -164,3 +202,5 @@ declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #1 declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) #1