diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -772,14 +772,53 @@ assert(Instr->getOpcode() == Instruction::URem); assert(!Instr->getType()->isVectorTy()); - // X % Y -> X for X < Y - if (LVI->getConstantRange(Instr->getOperand(0), Instr) - .icmp(ICmpInst::ICMP_ULT, - LVI->getConstantRange(Instr->getOperand(1), Instr))) { - Instr->replaceAllUsesWith(Instr->getOperand(0)); + Value *X = Instr->getOperand(0); + Value *Y = Instr->getOperand(1); + + ConstantRange XCR = LVI->getConstantRange(X, Instr); + ConstantRange YCR = LVI->getConstantRange(Y, Instr); + + // X u% Y -> X iff X u< Y (special case of the following transformation) + if (XCR.icmp(ICmpInst::ICMP_ULT, YCR)) { + Instr->replaceAllUsesWith(X); Instr->eraseFromParent(); return true; } + + // Given + // R = X u% Y + // We can represent the modulo operation as a loop/self-recursion: + // urem_rec(X, Y): + // Z = X - Y + // if X u< Y + // ret X + // else + // ret urem_rec(Z, Y) + // which isn't better, but if we only need a single iteration + // to compute the answer, this becomes quite good: + // R = X < Y ? X : X - Y iff X u< 2*Y (w/ unsigned saturation) + // Now, we do not care about all full multiples of Y in X, they do not change + // the answer, thus we could rewrite the expression as: + // X* = X - (Y * |_ X / Y _|) + // R = X* % Y + // so we don't need the *first* iteration to return, we just need to + // know *which* iteration will always return, so we could also rewrite it as: + // X* = X - (Y * |_ X / Y _|) + // R = X* % Y iff X* u< 2*Y + // but that does not seem profitable here. + if (XCR.icmp(ICmpInst::ICMP_ULT, YCR.umul_sat(APInt(YCR.getBitWidth(), 2)))) { + IRBuilder<> B{Instr}; + auto *AdjX = B.CreateSub(X, Y, Instr->getName() + ".urem", /*HasNUW*/ true, + /*HasNSW*/ true); + auto *Cmp = + B.CreateICmp(ICmpInst::ICMP_ULT, X, Y, Instr->getName() + ".cmp"); + auto *ExpandedURem = B.CreateSelect(Cmp, X, AdjX); + ExpandedURem->takeName(Instr); + Instr->replaceAllUsesWith(ExpandedURem); + Instr->eraseFromParent(); + return true; + } + return false; } diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll b/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/urem-expansion.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s + +declare void @llvm.assume(i1) + +define i8 @constant.divisor.v3(i8 %x) { +; CHECK-LABEL: @constant.divisor.v3( +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: ret i8 [[X]] +; + %cmp.x.upper = icmp ult i8 %x, 3 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} +define i8 @constant.divisor.v4(i8 %x) { +; CHECK-LABEL: @constant.divisor.v4( +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw nsw i8 [[X]], 3 +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], 3 +; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.upper = icmp ult i8 %x, 4 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} +define i8 @constant.divisor.v5(i8 %x) { +; CHECK-LABEL: @constant.divisor.v5( +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw nsw i8 [[X]], 3 +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], 3 +; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.upper = icmp ult i8 %x, 5 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} +define i8 @constant.divisor.v6(i8 %x) { +; CHECK-LABEL: @constant.divisor.v6( +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw nsw i8 [[X]], 3 +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], 3 +; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.upper = icmp ult i8 %x, 6 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} +define i8 @constant.divisor.v7(i8 %x) { +; CHECK-LABEL: @constant.divisor.v7( +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], 7 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.upper = icmp ult i8 %x, 7 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 3 + ret i8 %rem +} + +define i8 @variable.v3(i8 %x, i8 %y) { +; CHECK-LABEL: @variable.v3( +; CHECK-NEXT: [[CMP_X:%.*]] = icmp ult i8 [[X:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X]]) +; CHECK-NEXT: [[CMP_Y_LOWER:%.*]] = icmp uge i8 [[Y:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_LOWER]]) +; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) +; CHECK-NEXT: ret i8 [[X]] +; + %cmp.x = icmp ult i8 %x, 3 + call void @llvm.assume(i1 %cmp.x) + %cmp.y.lower = icmp uge i8 %y, 3 + call void @llvm.assume(i1 %cmp.y.lower) + %cmp.y.upper = icmp ule i8 %y, 4 + call void @llvm.assume(i1 %cmp.y.upper) + %rem = urem i8 %x, %y + ret i8 %rem +} +define i8 @variable.v4(i8 %x, i8 %y) { +; CHECK-LABEL: @variable.v4( +; CHECK-NEXT: [[CMP_X:%.*]] = icmp ult i8 [[X:%.*]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X]]) +; CHECK-NEXT: [[CMP_Y_LOWER:%.*]] = icmp uge i8 [[Y:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_LOWER]]) +; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw nsw i8 [[X]], [[Y]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], [[Y]] +; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x = icmp ult i8 %x, 4 + call void @llvm.assume(i1 %cmp.x) + %cmp.y.lower = icmp uge i8 %y, 3 + call void @llvm.assume(i1 %cmp.y.lower) + %cmp.y.upper = icmp ule i8 %y, 4 + call void @llvm.assume(i1 %cmp.y.upper) + %rem = urem i8 %x, %y + ret i8 %rem +} +define i8 @variable.v5(i8 %x, i8 %y) { +; CHECK-LABEL: @variable.v5( +; CHECK-NEXT: [[CMP_X:%.*]] = icmp ult i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X]]) +; CHECK-NEXT: [[CMP_Y_LOWER:%.*]] = icmp uge i8 [[Y:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_LOWER]]) +; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw nsw i8 [[X]], [[Y]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], [[Y]] +; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x = icmp ult i8 %x, 5 + call void @llvm.assume(i1 %cmp.x) + %cmp.y.lower = icmp uge i8 %y, 3 + call void @llvm.assume(i1 %cmp.y.lower) + %cmp.y.upper = icmp ule i8 %y, 4 + call void @llvm.assume(i1 %cmp.y.upper) + %rem = urem i8 %x, %y + ret i8 %rem +} +define i8 @variable.v6(i8 %x, i8 %y) { +; CHECK-LABEL: @variable.v6( +; CHECK-NEXT: [[CMP_X:%.*]] = icmp ult i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X]]) +; CHECK-NEXT: [[CMP_Y_LOWER:%.*]] = icmp uge i8 [[Y:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_LOWER]]) +; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw nsw i8 [[X]], [[Y]] +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], [[Y]] +; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x = icmp ult i8 %x, 6 + call void @llvm.assume(i1 %cmp.x) + %cmp.y.lower = icmp uge i8 %y, 3 + call void @llvm.assume(i1 %cmp.y.lower) + %cmp.y.upper = icmp ule i8 %y, 4 + call void @llvm.assume(i1 %cmp.y.upper) + %rem = urem i8 %x, %y + ret i8 %rem +} +define i8 @variable.v7(i8 %x, i8 %y) { +; CHECK-LABEL: @variable.v7( +; CHECK-NEXT: [[CMP_X:%.*]] = icmp ult i8 [[X:%.*]], 7 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X]]) +; CHECK-NEXT: [[CMP_Y_LOWER:%.*]] = icmp uge i8 [[Y:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_LOWER]]) +; CHECK-NEXT: [[CMP_Y_UPPER:%.*]] = icmp ule i8 [[Y]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_Y_UPPER]]) +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X]], [[Y]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x = icmp ult i8 %x, 7 + call void @llvm.assume(i1 %cmp.x) + %cmp.y.lower = icmp uge i8 %y, 3 + call void @llvm.assume(i1 %cmp.y.lower) + %cmp.y.upper = icmp ule i8 %y, 4 + call void @llvm.assume(i1 %cmp.y.upper) + %rem = urem i8 %x, %y + ret i8 %rem +} + +define i8 @large.divisor.v0(i8 %x) { +; CHECK-LABEL: @large.divisor.v0( +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], 127 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: ret i8 [[X]] +; + %cmp.x.upper = icmp ult i8 %x, 127 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 127 + ret i8 %rem +} +define i8 @large.divisor.v1(i8 %x) { +; CHECK-LABEL: @large.divisor.v1( +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], -128 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw nsw i8 [[X]], 127 +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], 127 +; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.upper = icmp ult i8 %x, 128 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 127 + ret i8 %rem +} +define i8 @large.divisor.v2.unbound.x(i8 %x) { +; CHECK-LABEL: @large.divisor.v2.unbound.x( +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X:%.*]], 127 +; CHECK-NEXT: ret i8 [[REM]] +; + %rem = urem i8 %x, 127 + ret i8 %rem +} + +define i8 @large.divisor.with.overflow.v0(i8 %x) { +; CHECK-LABEL: @large.divisor.with.overflow.v0( +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], -128 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: ret i8 [[X]] +; + %cmp.x.upper = icmp ult i8 %x, 128 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 128 + ret i8 %rem +} +define i8 @large.divisor.with.overflow.v1(i8 %x) { +; CHECK-LABEL: @large.divisor.with.overflow.v1( +; CHECK-NEXT: [[CMP_X_UPPER:%.*]] = icmp ult i8 [[X:%.*]], -127 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_X_UPPER]]) +; CHECK-NEXT: [[REM_UREM:%.*]] = sub nuw nsw i8 [[X]], -128 +; CHECK-NEXT: [[REM_CMP:%.*]] = icmp ult i8 [[X]], -128 +; CHECK-NEXT: [[REM:%.*]] = select i1 [[REM_CMP]], i8 [[X]], i8 [[REM_UREM]] +; CHECK-NEXT: ret i8 [[REM]] +; + %cmp.x.upper = icmp ult i8 %x, 129 + call void @llvm.assume(i1 %cmp.x.upper) + %rem = urem i8 %x, 128 + ret i8 %rem +} +define i8 @large.divisor.with.overflow.v2.unbound.x(i8 %x) { +; CHECK-LABEL: @large.divisor.with.overflow.v2.unbound.x( +; CHECK-NEXT: [[REM:%.*]] = urem i8 [[X:%.*]], -128 +; CHECK-NEXT: ret i8 [[REM]] +; + %rem = urem i8 %x, 128 + ret i8 %rem +} diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/urem.ll b/llvm/test/Transforms/CorrelatedValuePropagation/urem.ll --- a/llvm/test/Transforms/CorrelatedValuePropagation/urem.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/urem.ll @@ -117,9 +117,9 @@ define void @test5(i32 %n) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: [[TRUNC:%.*]] = and i32 [[N:%.*]], 63 -; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[TRUNC]] to i8 -; CHECK-NEXT: [[DIV1:%.*]] = urem i8 [[DIV_LHS_TRUNC]], 42 -; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i8 [[DIV1]] to i32 +; CHECK-NEXT: [[DIV_UREM:%.*]] = sub nuw nsw i32 [[TRUNC]], 42 +; CHECK-NEXT: [[DIV_CMP:%.*]] = icmp ult i32 [[TRUNC]], 42 +; CHECK-NEXT: [[DIV:%.*]] = select i1 [[DIV_CMP]], i32 [[TRUNC]], i32 [[DIV_UREM]] ; CHECK-NEXT: ret void ; %trunc = and i32 %n, 63 diff --git a/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll b/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll --- a/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll +++ b/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll @@ -9,8 +9,8 @@ define i32 @PR38781(i32 noundef %a, i32 noundef %b) { ; CHECK-LABEL: @PR38781( ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], -1 -; CHECK-NEXT: [[AND:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND1:%.*]] = icmp sgt i32 [[TMP1]], -1 +; CHECK-NEXT: [[AND:%.*]] = zext i1 [[AND1]] to i32 ; CHECK-NEXT: ret i32 [[AND]] ; %cmp = icmp sge i32 %a, 0 @@ -53,9 +53,9 @@ define i1 @PR54692_b(i8 noundef signext %c) { ; CHECK-LABEL: @PR54692_b( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[C:%.*]], 32 +; CHECK-NEXT: [[AND1:%.*]] = icmp ult i8 [[C:%.*]], 32 ; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i8 [[C]], 127 -; CHECK-NEXT: [[OR2:%.*]] = or i1 [[TMP0]], [[CMP6]] +; CHECK-NEXT: [[OR2:%.*]] = or i1 [[AND1]], [[CMP6]] ; CHECK-NEXT: ret i1 [[OR2]] ; entry: @@ -77,9 +77,9 @@ define i1 @PR54692_c(i8 noundef signext %c) { ; CHECK-LABEL: @PR54692_c( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i8 [[C:%.*]], 32 +; CHECK-NEXT: [[AND1:%.*]] = icmp ult i8 [[C:%.*]], 32 ; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i8 [[C]], 127 -; CHECK-NEXT: [[T0:%.*]] = or i1 [[TMP0]], [[CMP6]] +; CHECK-NEXT: [[T0:%.*]] = or i1 [[AND1]], [[CMP6]] ; CHECK-NEXT: ret i1 [[T0]] ; entry: @@ -115,7 +115,7 @@ ; O1-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 7 ; O1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; O1: if.then: -; O1-NEXT: call void (...) @foo() +; O1-NEXT: tail call void (...) @foo() ; O1-NEXT: br label [[IF_END]] ; O1: if.end: ; O1-NEXT: [[TMP0:%.*]] = load i32, ptr @c, align 4 @@ -123,16 +123,15 @@ ; ; OZ-LABEL: @PR56119( ; OZ-NEXT: entry: -; OZ-NEXT: [[E_COERCE_FR:%.*]] = freeze i32 [[E_COERCE:%.*]] -; OZ-NEXT: [[TMP0:%.*]] = and i32 [[E_COERCE_FR]], 255 -; OZ-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP0]], 7 -; OZ-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; OZ-NEXT: [[CONV2:%.*]] = and i32 [[E_COERCE:%.*]], 255 +; OZ-NEXT: [[CMP1:%.*]] = icmp eq i32 [[CONV2]], 7 +; OZ-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; OZ: if.then: ; OZ-NEXT: tail call void (...) @foo() ; OZ-NEXT: br label [[IF_END]] ; OZ: if.end: -; OZ-NEXT: [[TMP1:%.*]] = load i32, ptr @c, align 4 -; OZ-NEXT: ret i32 [[TMP1]] +; OZ-NEXT: [[TMP0:%.*]] = load i32, ptr @c, align 4 +; OZ-NEXT: ret i32 [[TMP0]] ; entry: %e = alloca %struct.a, align 4