diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5083,11 +5083,25 @@ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); EVT ShSVT = ShVT.getScalarType(); unsigned EltBits = VT.getScalarSizeInBits(); + EVT MulVT; // Check to see if we can do this. // FIXME: We should be more aggressive here. - if (!isTypeLegal(VT)) - return SDValue(); + if (!isTypeLegal(VT)) { + // Limit this to simple scalars for now. + if (VT.isVector() || !VT.isSimple()) + return SDValue(); + + // If this type will be promoted to a large enough type with a legal + // multiply operation, we can go ahead and do this transform. + if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger) + return SDValue(); + + MulVT = getTypeToTransformTo(*DAG.getContext(), VT); + if (MulVT.getSizeInBits() < (2 * EltBits) || + !isOperationLegal(ISD::MUL, MulVT)) + return SDValue(); + } // If the sdiv has an 'exact' bit we can use a simpler lowering. if (N->getFlags().hasExact()) @@ -5156,15 +5170,32 @@ // Multiply the numerator (operand 0) by the magic value. // FIXME: We should support doing a MUL in a wider type. - SDValue Q; - if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization)) - Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor); - else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) { - SDValue LoHi = - DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor); - Q = SDValue(LoHi.getNode(), 1); - } else - return SDValue(); // No mulhs or equivalent. + auto GetMULHS = [&](SDValue X, SDValue Y) { + // If the type isn't legal, use a wider mul of the the type calculated + // earlier. + if (!isTypeLegal(VT)) { + X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X); + Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y); + Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y); + Y = DAG.getNode(ISD::SRL, dl, MulVT, Y, + DAG.getShiftAmountConstant(EltBits, MulVT, dl)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Y); + } + + if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization)) + return DAG.getNode(ISD::MULHS, dl, VT, X, Y); + if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) { + SDValue LoHi = + DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y); + return SDValue(LoHi.getNode(), 1); + } + return SDValue(); + }; + + SDValue Q = GetMULHS(N0, MagicFactor); + if (!Q) + return SDValue(); + Created.push_back(Q.getNode()); // (Optionally) Add/subtract the numerator using Factor. @@ -5199,11 +5230,25 @@ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); EVT ShSVT = ShVT.getScalarType(); unsigned EltBits = VT.getScalarSizeInBits(); + EVT MulVT; // Check to see if we can do this. // FIXME: We should be more aggressive here. - if (!isTypeLegal(VT)) - return SDValue(); + if (!isTypeLegal(VT)) { + // Limit this to simple scalars for now. + if (VT.isVector() || !VT.isSimple()) + return SDValue(); + + // If this type will be promoted to a large enough type with a legal + // multiply operation, we can go ahead and do this transform. + if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger) + return SDValue(); + + MulVT = getTypeToTransformTo(*DAG.getContext(), VT); + if (MulVT.getSizeInBits() < (2 * EltBits) || + !isOperationLegal(ISD::MUL, MulVT)) + return SDValue(); + } bool UseNPQ = false; SmallVector PreShifts, PostShifts, MagicFactors, NPQFactors; @@ -5283,6 +5328,17 @@ // FIXME: We should support doing a MUL in a wider type. auto GetMULHU = [&](SDValue X, SDValue Y) { + // If the type isn't legal, use a wider mul of the the type calculated + // earlier. + if (!isTypeLegal(VT)) { + X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X); + Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y); + Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y); + Y = DAG.getNode(ISD::SRL, dl, MulVT, Y, + DAG.getShiftAmountConstant(EltBits, MulVT, dl)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Y); + } + if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization)) return DAG.getNode(ISD::MULHU, dl, VT, X, Y); if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) { diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -104,6 +104,8 @@ return Op.size() >= 8 ? MVT::i64 : MVT::i32; } + bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; } + bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override { return true; diff --git a/llvm/test/CodeGen/AArch64/srem-seteq.ll b/llvm/test/CodeGen/AArch64/srem-seteq.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq.ll @@ -83,13 +83,10 @@ define i16 @test_srem_even(i16 %X) nounwind { ; CHECK-LABEL: test_srem_even: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #9363 ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: movk w9, #37449, lsl #16 -; CHECK-NEXT: smull x9, w8, w9 -; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: add w8, w9, w8 -; CHECK-NEXT: asr w9, w8, #3 +; CHECK-NEXT: mov w9, #18725 +; CHECK-NEXT: mul w8, w8, w9 +; CHECK-NEXT: asr w9, w8, #18 ; CHECK-NEXT: add w8, w9, w8, lsr #31 ; CHECK-NEXT: mov w9, #14 ; CHECK-NEXT: msub w8, w8, w9, w0 diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll @@ -195,14 +195,15 @@ define i1 @t16_3_2(i16 %X) nounwind { ; CHECK-LABEL: t16_3_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: movk w9, #43690, lsl #16 -; CHECK-NEXT: mov w10, #-1431655766 -; CHECK-NEXT: madd w8, w8, w9, w10 -; CHECK-NEXT: mov w9, #1431655765 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: mul w8, w8, w9 +; CHECK-NEXT: lsr w8, w8, #17 +; CHECK-NEXT: add w8, w8, w8, lsl #1 +; CHECK-NEXT: sub w8, w0, w8 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #2 // =2 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %urem = urem i16 %X, 3 %cmp = icmp eq i16 %urem, 2 @@ -212,14 +213,15 @@ define i1 @t8_3_2(i8 %X) nounwind { ; CHECK-LABEL: t8_3_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: movk w9, #43690, lsl #16 -; CHECK-NEXT: mov w10, #-1431655766 -; CHECK-NEXT: madd w8, w8, w9, w10 -; CHECK-NEXT: mov w9, #1431655765 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: mov w9, #171 +; CHECK-NEXT: mul w8, w8, w9 +; CHECK-NEXT: lsr w8, w8, #9 +; CHECK-NEXT: add w8, w8, w8, lsl #1 +; CHECK-NEXT: sub w8, w0, w8 +; CHECK-NEXT: and w8, w8, #0xff +; CHECK-NEXT: cmp w8, #2 // =2 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %urem = urem i8 %X, 3 %cmp = icmp eq i8 %urem, 2 diff --git a/llvm/test/CodeGen/AArch64/urem-seteq.ll b/llvm/test/CodeGen/AArch64/urem-seteq.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq.ll @@ -78,15 +78,14 @@ define i16 @test_urem_even(i16 %X) nounwind { ; CHECK-LABEL: test_urem_even: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #28087 -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: movk w9, #46811, lsl #16 +; CHECK-NEXT: ubfx w8, w0, #1, #15 +; CHECK-NEXT: mov w9, #18725 ; CHECK-NEXT: mul w8, w8, w9 -; CHECK-NEXT: mov w9, #9362 -; CHECK-NEXT: ror w8, w8, #1 -; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: lsr w8, w8, #17 +; CHECK-NEXT: mov w9, #14 +; CHECK-NEXT: msub w8, w8, w9, w0 +; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %urem = urem i16 %X, 14 %cmp = icmp ne i16 %urem, 0 diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -615,7 +615,6 @@ ; CHECK-LABEL: test_ds_cross_basic_blocks: ; CHECK: # %bb.0: ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill @@ -627,59 +626,57 @@ ; CHECK-NEXT: li r7, 1 ; CHECK-NEXT: addi r6, r3, 4009 ; CHECK-NEXT: ld r5, .LC0@toc@l(r5) -; CHECK-NEXT: iselgt r8, r4, r7 -; CHECK-NEXT: lis r4, -21846 +; CHECK-NEXT: iselgt r4, r4, r7 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: li r9, -7 -; CHECK-NEXT: li r10, -6 +; CHECK-NEXT: li r8, -7 +; CHECK-NEXT: li r9, -6 +; CHECK-NEXT: li r10, 1 ; CHECK-NEXT: li r11, 1 ; CHECK-NEXT: li r12, 1 ; CHECK-NEXT: li r30, 1 ; CHECK-NEXT: ld r5, 0(r5) -; CHECK-NEXT: mtctr r8 -; CHECK-NEXT: ori r4, r4, 43691 -; CHECK-NEXT: li r8, -9 +; CHECK-NEXT: mtctr r4 +; CHECK-NEXT: li r4, -9 ; CHECK-NEXT: li r29, 1 -; CHECK-NEXT: li r28, 1 ; CHECK-NEXT: addi r5, r5, -1 ; CHECK-NEXT: b .LBB6_4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: ldx r0, r6, r8 -; CHECK-NEXT: add r28, r0, r28 -; CHECK-NEXT: ld r0, -8(r6) +; CHECK-NEXT: ldx r0, r6, r4 ; CHECK-NEXT: add r29, r0, r29 +; CHECK-NEXT: ld r0, -8(r6) +; CHECK-NEXT: add r30, r0, r30 ; CHECK-NEXT: .LBB6_3: -; CHECK-NEXT: mulld r0, r29, r28 +; CHECK-NEXT: mulld r0, r30, r29 ; CHECK-NEXT: addi r6, r6, 1 -; CHECK-NEXT: mulld r0, r0, r30 ; CHECK-NEXT: mulld r0, r0, r12 ; CHECK-NEXT: mulld r0, r0, r11 +; CHECK-NEXT: mulld r0, r0, r10 ; CHECK-NEXT: maddld r3, r0, r7, r3 ; CHECK-NEXT: bdz .LBB6_9 ; CHECK-NEXT: .LBB6_4: ; CHECK-NEXT: lbzu r0, 1(r5) -; CHECK-NEXT: mulhwu r27, r0, r4 -; CHECK-NEXT: rlwinm r26, r27, 0, 0, 30 -; CHECK-NEXT: srwi r27, r27, 1 -; CHECK-NEXT: add r27, r27, r26 -; CHECK-NEXT: sub r0, r0, r27 +; CHECK-NEXT: mulli r28, r0, 171 +; CHECK-NEXT: rlwinm r27, r28, 24, 8, 30 +; CHECK-NEXT: srwi r28, r28, 9 +; CHECK-NEXT: add r28, r28, r27 +; CHECK-NEXT: sub r0, r0, r28 +; CHECK-NEXT: clrlwi r0, r0, 24 ; CHECK-NEXT: cmplwi r0, 1 ; CHECK-NEXT: beq cr0, .LBB6_2 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: clrlwi r0, r0, 24 ; CHECK-NEXT: cmplwi r0, 2 ; CHECK-NEXT: bne cr0, .LBB6_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: ldx r0, r6, r9 -; CHECK-NEXT: add r30, r0, r30 -; CHECK-NEXT: ld r0, -4(r6) +; CHECK-NEXT: ldx r0, r6, r8 ; CHECK-NEXT: add r12, r0, r12 +; CHECK-NEXT: ld r0, -4(r6) +; CHECK-NEXT: add r11, r0, r11 ; CHECK-NEXT: b .LBB6_3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB6_7: -; CHECK-NEXT: ldx r0, r6, r10 -; CHECK-NEXT: add r11, r0, r11 +; CHECK-NEXT: ldx r0, r6, r9 +; CHECK-NEXT: add r10, r0, r10 ; CHECK-NEXT: ld r0, 0(r6) ; CHECK-NEXT: add r7, r0, r7 ; CHECK-NEXT: b .LBB6_3 @@ -690,7 +687,6 @@ ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr %3 = sext i32 %1 to i64 %4 = icmp eq i32 %1, 0 diff --git a/llvm/test/CodeGen/RISCV/div.ll b/llvm/test/CodeGen/RISCV/div.ll --- a/llvm/test/CodeGen/RISCV/div.ll +++ b/llvm/test/CodeGen/RISCV/div.ll @@ -79,16 +79,12 @@ ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a0, a0, 32 ; RV64IM-NEXT: srli a0, a0, 32 -; RV64IM-NEXT: lui a1, 1035469 +; RV64IM-NEXT: lui a1, 205 ; RV64IM-NEXT: addiw a1, a1, -819 ; RV64IM-NEXT: slli a1, a1, 12 ; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: mulhu a0, a0, a1 -; RV64IM-NEXT: srli a0, a0, 2 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: srli a0, a0, 34 ; RV64IM-NEXT: ret %1 = udiv i32 %a, 5 ret i32 %1 @@ -259,10 +255,9 @@ ; RV32IM-LABEL: udiv8_constant: ; RV32IM: # %bb.0: ; RV32IM-NEXT: andi a0, a0, 255 -; RV32IM-NEXT: lui a1, 838861 -; RV32IM-NEXT: addi a1, a1, -819 -; RV32IM-NEXT: mulhu a0, a0, a1 -; RV32IM-NEXT: srli a0, a0, 2 +; RV32IM-NEXT: addi a1, zero, 205 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: srli a0, a0, 10 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: udiv8_constant: @@ -279,16 +274,9 @@ ; RV64IM-LABEL: udiv8_constant: ; RV64IM: # %bb.0: ; RV64IM-NEXT: andi a0, a0, 255 -; RV64IM-NEXT: lui a1, 1035469 -; RV64IM-NEXT: addiw a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: mulhu a0, a0, a1 -; RV64IM-NEXT: srli a0, a0, 2 +; RV64IM-NEXT: addi a1, zero, 205 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: srli a0, a0, 10 ; RV64IM-NEXT: ret %1 = udiv i8 %a, 5 ret i8 %1 @@ -389,10 +377,10 @@ ; RV32IM-NEXT: lui a1, 16 ; RV32IM-NEXT: addi a1, a1, -1 ; RV32IM-NEXT: and a0, a0, a1 -; RV32IM-NEXT: lui a1, 838861 +; RV32IM-NEXT: lui a1, 13 ; RV32IM-NEXT: addi a1, a1, -819 -; RV32IM-NEXT: mulhu a0, a0, a1 -; RV32IM-NEXT: srli a0, a0, 2 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: srli a0, a0, 18 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: udiv16_constant: @@ -413,16 +401,10 @@ ; RV64IM-NEXT: lui a1, 16 ; RV64IM-NEXT: addiw a1, a1, -1 ; RV64IM-NEXT: and a0, a0, a1 -; RV64IM-NEXT: lui a1, 1035469 +; RV64IM-NEXT: lui a1, 13 ; RV64IM-NEXT: addiw a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -819 -; RV64IM-NEXT: mulhu a0, a0, a1 -; RV64IM-NEXT: srli a0, a0, 2 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: srli a0, a0, 18 ; RV64IM-NEXT: ret %1 = udiv i16 %a, 5 ret i16 %1 @@ -525,17 +507,11 @@ ; RV64IM-LABEL: sdiv_constant: ; RV64IM: # %bb.0: ; RV64IM-NEXT: sext.w a0, a0 -; RV64IM-NEXT: lui a1, 13107 -; RV64IM-NEXT: addiw a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 1639 -; RV64IM-NEXT: mulh a0, a0, a1 +; RV64IM-NEXT: lui a1, 419430 +; RV64IM-NEXT: addiw a1, a1, 1639 +; RV64IM-NEXT: mul a0, a0, a1 ; RV64IM-NEXT: srli a1, a0, 63 -; RV64IM-NEXT: srai a0, a0, 1 +; RV64IM-NEXT: srai a0, a0, 33 ; RV64IM-NEXT: add a0, a0, a1 ; RV64IM-NEXT: ret %1 = sdiv i32 %a, 5 @@ -784,12 +760,12 @@ ; RV32IM: # %bb.0: ; RV32IM-NEXT: slli a0, a0, 24 ; RV32IM-NEXT: srai a0, a0, 24 -; RV32IM-NEXT: lui a1, 419430 -; RV32IM-NEXT: addi a1, a1, 1639 -; RV32IM-NEXT: mulh a0, a0, a1 -; RV32IM-NEXT: srli a1, a0, 31 -; RV32IM-NEXT: srai a0, a0, 1 -; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: addi a1, zero, 103 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: srai a1, a0, 9 +; RV32IM-NEXT: srli a0, a0, 15 +; RV32IM-NEXT: andi a0, a0, 1 +; RV32IM-NEXT: add a0, a1, a0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: sdiv8_constant: @@ -808,18 +784,12 @@ ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a0, a0, 56 ; RV64IM-NEXT: srai a0, a0, 56 -; RV64IM-NEXT: lui a1, 13107 -; RV64IM-NEXT: addiw a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 1639 -; RV64IM-NEXT: mulh a0, a0, a1 -; RV64IM-NEXT: srli a1, a0, 63 -; RV64IM-NEXT: srai a0, a0, 1 -; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: addi a1, zero, 103 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: srai a1, a0, 9 +; RV64IM-NEXT: srli a0, a0, 15 +; RV64IM-NEXT: andi a0, a0, 1 +; RV64IM-NEXT: add a0, a1, a0 ; RV64IM-NEXT: ret %1 = sdiv i8 %a, 5 ret i8 %1 @@ -938,11 +908,11 @@ ; RV32IM: # %bb.0: ; RV32IM-NEXT: slli a0, a0, 16 ; RV32IM-NEXT: srai a0, a0, 16 -; RV32IM-NEXT: lui a1, 419430 +; RV32IM-NEXT: lui a1, 6 ; RV32IM-NEXT: addi a1, a1, 1639 -; RV32IM-NEXT: mulh a0, a0, a1 +; RV32IM-NEXT: mul a0, a0, a1 ; RV32IM-NEXT: srli a1, a0, 31 -; RV32IM-NEXT: srai a0, a0, 1 +; RV32IM-NEXT: srai a0, a0, 17 ; RV32IM-NEXT: add a0, a0, a1 ; RV32IM-NEXT: ret ; @@ -962,18 +932,13 @@ ; RV64IM: # %bb.0: ; RV64IM-NEXT: slli a0, a0, 48 ; RV64IM-NEXT: srai a0, a0, 48 -; RV64IM-NEXT: lui a1, 13107 -; RV64IM-NEXT: addiw a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 819 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 1639 -; RV64IM-NEXT: mulh a0, a0, a1 -; RV64IM-NEXT: srli a1, a0, 63 -; RV64IM-NEXT: srai a0, a0, 1 -; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: lui a1, 6 +; RV64IM-NEXT: addiw a1, a1, 1639 +; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: srai a1, a0, 17 +; RV64IM-NEXT: srli a0, a0, 31 +; RV64IM-NEXT: andi a0, a0, 1 +; RV64IM-NEXT: add a0, a1, a0 ; RV64IM-NEXT: ret %1 = sdiv i16 %a, 5 ret i16 %1 diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll --- a/llvm/test/CodeGen/RISCV/srem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -46,23 +46,18 @@ ; ; RV64IM-LABEL: fold_srem_positive_odd: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a0, a0 -; RV64IM-NEXT: lui a1, 1045903 -; RV64IM-NEXT: addiw a1, a1, -733 -; RV64IM-NEXT: slli a1, a1, 15 -; RV64IM-NEXT: addi a1, a1, 1035 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -905 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -1767 -; RV64IM-NEXT: mulh a1, a0, a1 -; RV64IM-NEXT: add a1, a1, a0 -; RV64IM-NEXT: srli a2, a1, 63 -; RV64IM-NEXT: srai a1, a1, 6 +; RV64IM-NEXT: sext.w a1, a0 +; RV64IM-NEXT: lui a2, 706409 +; RV64IM-NEXT: addiw a2, a2, 389 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 32 +; RV64IM-NEXT: addw a1, a1, a0 +; RV64IM-NEXT: srliw a2, a1, 31 +; RV64IM-NEXT: srli a1, a1, 6 ; RV64IM-NEXT: add a1, a1, a2 ; RV64IM-NEXT: addi a2, zero, 95 ; RV64IM-NEXT: mul a1, a1, a2 -; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = srem i32 %x, 95 ret i32 %1 @@ -106,20 +101,16 @@ ; ; RV64IM-LABEL: fold_srem_positive_even: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a0, a0 -; RV64IM-NEXT: lui a1, 506482 -; RV64IM-NEXT: addiw a1, a1, -31 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 711 -; RV64IM-NEXT: slli a1, a1, 19 -; RV64IM-NEXT: addi a1, a1, 1979 -; RV64IM-NEXT: mulh a1, a0, a1 +; RV64IM-NEXT: sext.w a1, a0 +; RV64IM-NEXT: lui a2, 253241 +; RV64IM-NEXT: addiw a2, a2, -15 +; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a2, a1, 63 -; RV64IM-NEXT: srai a1, a1, 9 +; RV64IM-NEXT: srai a1, a1, 40 ; RV64IM-NEXT: add a1, a1, a2 ; RV64IM-NEXT: addi a2, zero, 1060 ; RV64IM-NEXT: mul a1, a1, a2 -; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = srem i32 %x, 1060 ret i32 %1 @@ -163,23 +154,16 @@ ; ; RV64IM-LABEL: fold_srem_negative_odd: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a0, a0 -; RV64IM-NEXT: lui a1, 4781 -; RV64IM-NEXT: addiw a1, a1, 2045 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, 1371 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, -11 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -1355 -; RV64IM-NEXT: mulh a1, a0, a1 -; RV64IM-NEXT: sub a1, a1, a0 +; RV64IM-NEXT: sext.w a1, a0 +; RV64IM-NEXT: lui a2, 677296 +; RV64IM-NEXT: addiw a2, a2, -91 +; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a2, a1, 63 -; RV64IM-NEXT: srai a1, a1, 9 +; RV64IM-NEXT: srai a1, a1, 40 ; RV64IM-NEXT: add a1, a1, a2 ; RV64IM-NEXT: addi a2, zero, -723 ; RV64IM-NEXT: mul a1, a1, a2 -; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = srem i32 %x, -723 ret i32 %1 @@ -226,23 +210,17 @@ ; ; RV64IM-LABEL: fold_srem_negative_even: ; RV64IM: # %bb.0: -; RV64IM-NEXT: sext.w a0, a0 -; RV64IM-NEXT: lui a1, 1036895 -; RV64IM-NEXT: addiw a1, a1, 999 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 11 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -523 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -481 -; RV64IM-NEXT: mulh a1, a0, a1 +; RV64IM-NEXT: sext.w a1, a0 +; RV64IM-NEXT: lui a2, 1036895 +; RV64IM-NEXT: addiw a2, a2, 999 +; RV64IM-NEXT: mul a1, a1, a2 ; RV64IM-NEXT: srli a2, a1, 63 -; RV64IM-NEXT: srai a1, a1, 12 +; RV64IM-NEXT: srai a1, a1, 40 ; RV64IM-NEXT: add a1, a1, a2 ; RV64IM-NEXT: lui a2, 1048570 ; RV64IM-NEXT: addiw a2, a2, 1595 ; RV64IM-NEXT: mul a1, a1, a2 -; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = srem i32 %x, -22981 ret i32 %1 @@ -310,18 +288,14 @@ ; RV64IM-LABEL: combine_srem_sdiv: ; RV64IM: # %bb.0: ; RV64IM-NEXT: sext.w a1, a0 -; RV64IM-NEXT: lui a2, 1045903 -; RV64IM-NEXT: addiw a2, a2, -733 -; RV64IM-NEXT: slli a2, a2, 15 -; RV64IM-NEXT: addi a2, a2, 1035 -; RV64IM-NEXT: slli a2, a2, 12 -; RV64IM-NEXT: addi a2, a2, -905 -; RV64IM-NEXT: slli a2, a2, 12 -; RV64IM-NEXT: addi a2, a2, -1767 -; RV64IM-NEXT: mulh a2, a1, a2 -; RV64IM-NEXT: add a1, a2, a1 -; RV64IM-NEXT: srli a2, a1, 63 -; RV64IM-NEXT: srai a1, a1, 6 +; RV64IM-NEXT: lui a2, 706409 +; RV64IM-NEXT: addiw a2, a2, 389 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 32 +; RV64IM-NEXT: addw a2, a1, a0 +; RV64IM-NEXT: srliw a2, a2, 31 +; RV64IM-NEXT: add a1, a1, a0 +; RV64IM-NEXT: sraiw a1, a1, 6 ; RV64IM-NEXT: add a1, a1, a2 ; RV64IM-NEXT: addi a2, zero, 95 ; RV64IM-NEXT: mul a2, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll --- a/llvm/test/CodeGen/RISCV/urem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll @@ -47,24 +47,19 @@ ; ; RV64IM-LABEL: fold_urem_positive_odd: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a0, a0, 32 -; RV64IM-NEXT: srli a0, a0, 32 -; RV64IM-NEXT: lui a1, 1423 -; RV64IM-NEXT: addiw a1, a1, -733 -; RV64IM-NEXT: slli a1, a1, 15 -; RV64IM-NEXT: addi a1, a1, 1035 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, -1811 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 561 -; RV64IM-NEXT: mulhu a1, a0, a1 +; RV64IM-NEXT: slli a1, a0, 32 +; RV64IM-NEXT: srli a1, a1, 32 +; RV64IM-NEXT: lui a2, 364242 +; RV64IM-NEXT: addiw a2, a2, 777 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 32 ; RV64IM-NEXT: sub a2, a0, a1 -; RV64IM-NEXT: srli a2, a2, 1 +; RV64IM-NEXT: srliw a2, a2, 1 ; RV64IM-NEXT: add a1, a2, a1 ; RV64IM-NEXT: srli a1, a1, 6 ; RV64IM-NEXT: addi a2, zero, 95 ; RV64IM-NEXT: mul a1, a1, a2 -; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = urem i32 %x, 95 ret i32 %1 @@ -107,21 +102,17 @@ ; ; RV64IM-LABEL: fold_urem_positive_even: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a0, a0, 32 -; RV64IM-NEXT: srli a0, a0, 32 -; RV64IM-NEXT: lui a1, 1048020 -; RV64IM-NEXT: addiw a1, a1, -1793 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 139 -; RV64IM-NEXT: slli a1, a1, 14 -; RV64IM-NEXT: addi a1, a1, 1793 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, -139 -; RV64IM-NEXT: mulhu a1, a0, a1 -; RV64IM-NEXT: srli a1, a1, 10 +; RV64IM-NEXT: slli a1, a0, 32 +; RV64IM-NEXT: srli a1, a1, 32 +; RV64IM-NEXT: lui a2, 62 +; RV64IM-NEXT: addiw a2, a2, -711 +; RV64IM-NEXT: slli a2, a2, 14 +; RV64IM-NEXT: addi a2, a2, -61 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 42 ; RV64IM-NEXT: addi a2, zero, 1060 ; RV64IM-NEXT: mul a1, a1, a2 -; RV64IM-NEXT: sub a0, a0, a1 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = urem i32 %x, 1060 ret i32 %1 @@ -189,25 +180,20 @@ ; ; RV64IM-LABEL: combine_urem_udiv: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a0, a0, 32 -; RV64IM-NEXT: srli a0, a0, 32 -; RV64IM-NEXT: lui a1, 1423 -; RV64IM-NEXT: addiw a1, a1, -733 -; RV64IM-NEXT: slli a1, a1, 15 -; RV64IM-NEXT: addi a1, a1, 1035 -; RV64IM-NEXT: slli a1, a1, 13 -; RV64IM-NEXT: addi a1, a1, -1811 -; RV64IM-NEXT: slli a1, a1, 12 -; RV64IM-NEXT: addi a1, a1, 561 -; RV64IM-NEXT: mulhu a1, a0, a1 +; RV64IM-NEXT: slli a1, a0, 32 +; RV64IM-NEXT: srli a1, a1, 32 +; RV64IM-NEXT: lui a2, 364242 +; RV64IM-NEXT: addiw a2, a2, 777 +; RV64IM-NEXT: mul a1, a1, a2 +; RV64IM-NEXT: srli a1, a1, 32 ; RV64IM-NEXT: sub a2, a0, a1 -; RV64IM-NEXT: srli a2, a2, 1 +; RV64IM-NEXT: srliw a2, a2, 1 ; RV64IM-NEXT: add a1, a2, a1 ; RV64IM-NEXT: srli a1, a1, 6 ; RV64IM-NEXT: addi a2, zero, 95 ; RV64IM-NEXT: mul a2, a1, a2 ; RV64IM-NEXT: sub a0, a0, a2 -; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = urem i32 %x, 95 %2 = udiv i32 %x, 95