Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2218,6 +2218,24 @@ return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), getI8Imm(ShlVal)); } + case X86ISD::UMUL8: + case X86ISD::SMUL8: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r); + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL, + N0, SDValue()).getValue(1); + + SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32); + SDValue Ops[] = {N1, InFlag}; + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); + + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); + return nullptr; + } case X86ISD::UMUL: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -301,6 +301,9 @@ UMUL, // LOW, HI, FLAGS = umul LHS, RHS + // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS + SMUL8, UMUL8, + // MUL_IMM - X86 specific multiply by immediate. MUL_IMM, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1595,9 +1595,6 @@ setOperationAction(ISD::UMULO, VT, Custom); } - // There are no 8-bit 3-address imul/mul instructions - setOperationAction(ISD::SMULO, MVT::i8, Expand); - setOperationAction(ISD::UMULO, MVT::i8, Expand); if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. @@ -18188,10 +18185,15 @@ Cond = X86::COND_B; break; case ISD::SMULO: - BaseOp = X86ISD::SMUL; + BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL; Cond = X86::COND_O; break; case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs + if (N->getValueType(0) == MVT::i8) { + BaseOp = X86ISD::UMUL8; + Cond = X86::COND_O; + break; + } SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0), MVT::i32); SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS); Index: test/CodeGen/X86/i8-umulo.ll =================================================================== --- test/CodeGen/X86/i8-umulo.ll +++ test/CodeGen/X86/i8-umulo.ll @@ -3,12 +3,13 @@ declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b) define i8 @testumulo(i32 %argc) { -; CHECK: imull -; CHECK: testb %{{.+}}, %{{.+}} -; CHECK: je [[NOOVERFLOWLABEL:.+]] +; CHECK-LABEL: testumulo +; CHECK: movb $25, %al +; CHECK: mulb [[RESULTREG:.+]] +; CHECK: jno [[NOOVERFLOWLABEL:.+]] +; CHECK: movb [[RESULTREG]], %al ; CHECK: {{.*}}[[NOOVERFLOWLABEL]]: -; CHECK-NEXT: movb -; CHECK-NEXT: retl +; CHECK: retl top: %RHS = trunc i32 %argc to i8 %umul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 25, i8 %RHS) @@ -22,3 +23,26 @@ %umul.value = extractvalue { i8, i1 } %umul, 0 ret i8 %umul.value } + +declare {i8, i1} @llvm.smul.with.overflow.i8(i8 %a, i8 %b) +define i8 @testsmulo(i32 %argc) { +; CHECK-LABEL: testsmulo +; CHECK: movb $25, %al +; CHECK: imulb [[RESULTREG:.+]] +; CHECK: jno [[NOOVERFLOWLABEL:.+]] +; CHECK: movb [[RESULTREG]], %al +; CHECK: {{.*}}[[NOOVERFLOWLABEL]]: +; CHECK: retl +top: + %RHS = trunc i32 %argc to i8 + %smul = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 25, i8 %RHS) + %ex = extractvalue { i8, i1 } %smul, 1 + br i1 %ex, label %overflow, label %nooverlow + +overflow: + ret i8 %RHS + +nooverlow: + %smul.value = extractvalue { i8, i1 } %smul, 0 + ret i8 %smul.value +}