Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15743,9 +15743,32 @@ SDValue &ConstNode) { APInt Val; - // If the add only has one use, this would be OK to do. - if (AddNode.getNode()->hasOneUse()) + // If the add only has one use, do further check of c1 and c1*c2. + if (AddNode.getNode()->hasOneUse()) { + // There is no regression by the transform since both c1 and c2 + // are too large. + unsigned Bits = ConstNode.getScalarValueSizeInBits(); + if (Bits > 8 * sizeof(int64_t)) + return true; + if (auto *C1Node = dyn_cast(AddNode.getOperand(1))) + if (auto *C2Node = dyn_cast(ConstNode)) { + const APInt &C1 = C1Node->getAPIntValue(); + const APInt &C2 = C2Node->getAPIntValue(); + // Prevent the transform since c1*c2 is overflow. + if ((C1 * C2).getBitWidth() > ConstNode.getScalarValueSizeInBits()) + return false; + // Do sign extension for c1*c2 according to c2's type. + int64_t C1C2 = llvm::SignExtend64((C1 * C2).getZExtValue(), Bits); + // This transform will introduce regression, if c1 is legal add + // immediate while c1*c2 isn't. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isLegalAddImmediate(C1.getSExtValue()) && + !TLI.isLegalAddImmediate(C1C2)) + return false; + } + // It is OK to do the transform. return true; + } // Walk all the users of the constant with which we're multiplying. for (SDNode *Use : ConstNode->uses()) { Index: llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll @@ -4,10 +4,11 @@ define i1 @t32_3_1(i32 %X) nounwind { ; CHECK-LABEL: t32_3_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: sub w8, w0, #1 // =1 +; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #1431655765 -; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret @@ -19,10 +20,10 @@ define i1 @t32_3_2(i32 %X) nounwind { ; CHECK-LABEL: t32_3_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: movk w8, #43690, lsl #16 -; CHECK-NEXT: mov w9, #-1431655766 -; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: sub w8, w0, #2 // =2 +; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #1431655765 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo @@ -36,10 +37,11 @@ define i1 @t32_5_1(i32 %X) nounwind { ; CHECK-LABEL: t32_5_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: movk w8, #52428, lsl #16 +; CHECK-NEXT: mov w9, #52429 +; CHECK-NEXT: sub w8, w0, #1 // =1 +; CHECK-NEXT: movk w9, #52428, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #858993459 -; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo ; CHECK-NEXT: ret @@ -51,10 +53,10 @@ define i1 @t32_5_2(i32 %X) nounwind { ; CHECK-LABEL: t32_5_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: movk w8, #52428, lsl #16 -; CHECK-NEXT: mov w9, #1717986918 -; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mov w9, #52429 +; CHECK-NEXT: sub w8, w0, #2 // =2 +; CHECK-NEXT: movk w9, #52428, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #858993459 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo @@ -67,10 +69,10 @@ define i1 @t32_5_3(i32 %X) nounwind { ; CHECK-LABEL: t32_5_3: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: movk w8, #52428, lsl #16 -; CHECK-NEXT: mov w9, #-1717986919 -; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mov w9, #52429 +; CHECK-NEXT: sub w8, w0, #3 // =3 +; CHECK-NEXT: movk w9, #52428, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #858993459 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo @@ -83,10 +85,10 @@ define i1 @t32_5_4(i32 %X) nounwind { ; CHECK-LABEL: t32_5_4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: movk w8, #52428, lsl #16 -; CHECK-NEXT: mov w9, #-858993460 -; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mov w9, #52429 +; CHECK-NEXT: sub w8, w0, #4 // =4 +; CHECK-NEXT: movk w9, #52428, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #858993459 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo @@ -100,10 +102,10 @@ define i1 @t32_6_1(i32 %X) nounwind { ; CHECK-LABEL: t32_6_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: movk w8, #43690, lsl #16 -; CHECK-NEXT: mov w9, #1431655765 -; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: sub w8, w0, #1 // =1 +; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: ror w8, w8, #1 ; CHECK-NEXT: movk w9, #10922, lsl #16 @@ -118,10 +120,10 @@ define i1 @t32_6_2(i32 %X) nounwind { ; CHECK-LABEL: t32_6_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: movk w8, #43690, lsl #16 -; CHECK-NEXT: mov w9, #-1431655766 -; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: sub w8, w0, #2 // =2 +; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: ror w8, w8, #1 ; CHECK-NEXT: movk w9, #10922, lsl #16 @@ -154,11 +156,10 @@ define i1 @t32_6_4(i32 %X) nounwind { ; CHECK-LABEL: t32_6_4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: mov w9, #21844 -; CHECK-NEXT: movk w8, #43690, lsl #16 -; CHECK-NEXT: movk w9, #21845, lsl #16 -; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: sub w8, w0, #4 // =4 +; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #43690 ; CHECK-NEXT: ror w8, w8, #1 ; CHECK-NEXT: movk w9, #10922, lsl #16 @@ -173,11 +174,10 @@ define i1 @t32_6_5(i32 %X) nounwind { ; CHECK-LABEL: t32_6_5: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: mov w9, #43689 -; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: sub w8, w0, #5 // =5 ; CHECK-NEXT: movk w9, #43690, lsl #16 -; CHECK-NEXT: madd w8, w0, w8, w9 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #43690 ; CHECK-NEXT: ror w8, w8, #1 ; CHECK-NEXT: movk w9, #10922, lsl #16 @@ -195,11 +195,11 @@ define i1 @t16_3_2(i16 %X) nounwind { ; CHECK-LABEL: t16_3_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: movk w9, #43690, lsl #16 -; CHECK-NEXT: mov w10, #-1431655766 -; CHECK-NEXT: madd w8, w8, w9, w10 +; CHECK-NEXT: sub w8, w8, #2 // =2 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #1431655765 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo @@ -212,11 +212,11 @@ define i1 @t8_3_2(i8 %X) nounwind { ; CHECK-LABEL: t8_3_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: mov w9, #43691 ; CHECK-NEXT: movk w9, #43690, lsl #16 -; CHECK-NEXT: mov w10, #-1431655766 -; CHECK-NEXT: madd w8, w8, w9, w10 +; CHECK-NEXT: sub w8, w8, #2 // =2 +; CHECK-NEXT: mul w8, w8, w9 ; CHECK-NEXT: mov w9, #1431655765 ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: cset w0, lo @@ -229,10 +229,10 @@ define i1 @t64_3_2(i64 %X) nounwind { ; CHECK-LABEL: t64_3_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-6148914691236517206 -; CHECK-NEXT: movk x8, #43691 ; CHECK-NEXT: mov x9, #-6148914691236517206 -; CHECK-NEXT: madd x8, x0, x8, x9 +; CHECK-NEXT: sub x8, x0, #2 // =2 +; CHECK-NEXT: movk x9, #43691 +; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: mov x9, #6148914691236517205 ; CHECK-NEXT: cmp x8, x9 ; CHECK-NEXT: cset w0, lo Index: llvm/test/CodeGen/X86/urem-seteq-nonzero.ll =================================================================== --- llvm/test/CodeGen/X86/urem-seteq-nonzero.ll +++ llvm/test/CodeGen/X86/urem-seteq-nonzero.ll @@ -310,12 +310,11 @@ ; ; X64-LABEL: t64_3_2: ; X64: # %bb.0: +; X64-NEXT: addq $-2, %rdi ; X64-NEXT: movabsq $-6148914691236517205, %rax # imm = 0xAAAAAAAAAAAAAAAB ; X64-NEXT: imulq %rdi, %rax -; X64-NEXT: movabsq $-6148914691236517206, %rcx # imm = 0xAAAAAAAAAAAAAAAA -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555 -; X64-NEXT: cmpq %rax, %rcx +; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: setb %al ; X64-NEXT: retq %urem = urem i64 %X, 3