diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7015,8 +7015,10 @@ return false; APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); - if (DAG.MaskedValueIsZero(LHS, HighMask) && - DAG.MaskedValueIsZero(RHS, HighMask)) { + bool LHSIsZ = DAG.MaskedValueIsZero(LHS, HighMask); + bool RHSIsZ = DAG.MaskedValueIsZero(RHS, HighMask); + + if (LHSIsZ && RHSIsZ) { // The inputs are both zero-extended. if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) { Result.push_back(Lo); @@ -7030,15 +7032,37 @@ } } - if (!VT.isVector() && Opcode == ISD::MUL && - DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize && - DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) { - // The input values are both sign-extended. - // TODO non-MUL case? - if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) { - Result.push_back(Lo); - Result.push_back(Hi); - return true; + if (!VT.isVector() && Opcode == ISD::MUL) { + bool LHSIsS = DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize; + bool RHSIsS = DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize; + if (LHSIsS && RHSIsS) { + // The input values are both sign-extended. + // TODO non-MUL case? + if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) { + Result.push_back(Lo); + Result.push_back(Hi); + return true; + } + } + if ((LHSIsZ && RHSIsS) || (LHSIsS && RHSIsZ)) { + // One input is sign-extended and the other is zero-extended. + // Use a mulhu with a correction. + if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) { + // Canonicalize sign-extended value to LL and zero-extended value to RL. + if (LHSIsZ) + std::swap(LL, RL); + // High bits of LHS are 0 or -1. We need to multiply this by RL which + // will give 0 or -RL. This needs to be added to Hi. This can be done + // as: + // Hi -= LL < 0 ? RL : 0; + LL = DAG.getNode(ISD::SRA, dl, HiLoVT, LL, + DAG.getShiftAmountConstant(InnerBitSize - 1, HiLoVT, dl)); + LL = DAG.getNode(ISD::AND, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::SUB, dl, HiLoVT, Hi, LL); + Result.push_back(Lo); + Result.push_back(Hi); + return true; + } } } diff --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll --- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll @@ -375,11 +375,11 @@ ; SI-LABEL: mad_i64_i32_extops_i32_i64: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_ashrrev_i32_e32 v4, 31, v0 -; SI-NEXT: v_mul_hi_u32 v5, v0, v1 -; SI-NEXT: v_mul_lo_u32 v4, v4, v1 +; SI-NEXT: v_mul_hi_u32 v4, v0, v1 +; SI-NEXT: v_ashrrev_i32_e32 v5, 31, v0 ; SI-NEXT: v_mul_lo_u32 v0, v0, v1 -; SI-NEXT: v_add_i32_e32 v1, vcc, v5, v4 +; SI-NEXT: v_and_b32_e32 v5, v5, v1 +; SI-NEXT: v_sub_i32_e32 v1, vcc, v4, v5 ; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc ; SI-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/X86/extmul128.ll b/llvm/test/CodeGen/X86/extmul128.ll --- a/llvm/test/CodeGen/X86/extmul128.ll +++ b/llvm/test/CodeGen/X86/extmul128.ll @@ -29,8 +29,8 @@ ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: mulq %rsi ; CHECK-NEXT: sarq $63, %rsi -; CHECK-NEXT: imulq %rdi, %rsi -; CHECK-NEXT: addq %rsi, %rdx +; CHECK-NEXT: andq %rdi, %rsi +; CHECK-NEXT: subq %rsi, %rdx ; CHECK-NEXT: retq %aa = zext i64 %a to i128 %bb = sext i64 %b to i128 @@ -42,11 +42,10 @@ ; CHECK-LABEL: i64_sext_zext_i128: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: movq %rdi, %rcx -; CHECK-NEXT: sarq $63, %rcx ; CHECK-NEXT: mulq %rsi -; CHECK-NEXT: imulq %rsi, %rcx -; CHECK-NEXT: addq %rcx, %rdx +; CHECK-NEXT: sarq $63, %rdi +; CHECK-NEXT: andq %rsi, %rdi +; CHECK-NEXT: subq %rdi, %rdx ; CHECK-NEXT: retq %aa = sext i64 %a to i128 %bb = zext i64 %b to i128