diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -977,9 +977,18 @@ LatticeVal V2State = getValueState(I.getOperand(1)); LatticeVal &IV = ValueState[&I]; - if (isOverdefined(IV)) + if (IV.isOverdefined()) + return; + + // If something is undef, wait for it to resolve. + if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef()) + return; + + if (V1State.isOverdefined() && V2State.isOverdefined()) return (void)markOverdefined(&I); + // Both operands are non-integer constants or constant expressions. + // TODO: Use information from notconstant better. if (isConstant(V1State) && isConstant(V2State)) { Constant *C = ConstantExpr::get(I.getOpcode(), getConstant(V1State), getConstant(V2State)); @@ -989,50 +998,21 @@ return (void)markConstant(IV, &I, C); } - // If something is undef, wait for it to resolve. - if (V1State.isUnknownOrUndef() || V2State.isUnknownOrUndef()) - return; - - // Otherwise, one of our operands is overdefined. Try to produce something - // better than overdefined with some tricks. - // If this is 0 / Y, it doesn't matter that the second operand is - // overdefined, and we can replace it with zero. - if (I.getOpcode() == Instruction::UDiv || I.getOpcode() == Instruction::SDiv) - if (isConstant(V1State) && getConstant(V1State)->isNullValue()) - return (void)markConstant(IV, &I, getConstant(V1State)); - - // If this is: - // -> AND/MUL with 0 - // -> OR with -1 - // it doesn't matter that the other operand is overdefined. - if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Mul || - I.getOpcode() == Instruction::Or) { - LatticeVal *NonOverdefVal = nullptr; - if (!isOverdefined(V1State)) - NonOverdefVal = &V1State; - - else if (!isOverdefined(V2State)) - NonOverdefVal = &V2State; - if (NonOverdefVal) { - if (!isConstant(*NonOverdefVal)) - return; + // Operands are either constant ranges, notconstant, overdefined or one of the + // operands is a constant. + ConstantRange A = ConstantRange::getFull(I.getType()->getScalarSizeInBits()); + ConstantRange B = ConstantRange::getFull(I.getType()->getScalarSizeInBits()); + if (V1State.isConstantRange()) + A = V1State.getConstantRange(); + if (V2State.isConstantRange()) + B = V2State.getConstantRange(); - if (I.getOpcode() == Instruction::And || - I.getOpcode() == Instruction::Mul) { - // X and 0 = 0 - // X * 0 = 0 - if (getConstant(*NonOverdefVal)->isNullValue()) - return (void)markConstant(IV, &I, getConstant(*NonOverdefVal)); - } else { - // X or -1 = -1 - if (ConstantInt *CI = getConstantInt(*NonOverdefVal)) - if (CI->isMinusOne()) - return (void)markConstant(IV, &I, CI); - } - } - } + ConstantRange R = A.binaryOp(cast(&I)->getOpcode(), B); + mergeInValue(&I, LatticeVal::getRange(R)); - markOverdefined(&I); + // TODO: Currently we do not exploit special values that produce something + // better than overdefined with an overdefined operand for vector or floating + // point types, like and <4 x i32> overdefined, zeroinitializer. } // Handle ICmpInst instruction. diff --git a/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll b/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll --- a/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll +++ b/llvm/test/Transforms/SCCP/binaryops-range-special-cases.ll @@ -7,16 +7,13 @@ ; CHECK-NEXT: [[D:%.*]] = sdiv i32 1, [[X:%.*]] ; CHECK-NEXT: [[C_0:%.*]] = icmp slt i32 0, [[D]] ; CHECK-NEXT: call void @use(i1 [[C_0]]) -; CHECK-NEXT: [[C_1:%.*]] = icmp slt i32 1, [[D]] -; CHECK-NEXT: call void @use(i1 [[C_1]]) -; CHECK-NEXT: [[C_2:%.*]] = icmp slt i32 2, [[D]] -; CHECK-NEXT: call void @use(i1 [[C_2]]) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[C_3:%.*]] = icmp eq i32 1, [[D]] ; CHECK-NEXT: call void @use(i1 [[C_3]]) ; CHECK-NEXT: [[C_4:%.*]] = icmp eq i32 0, [[D]] ; CHECK-NEXT: call void @use(i1 [[C_4]]) -; CHECK-NEXT: [[C_5:%.*]] = icmp eq i32 2, [[D]] -; CHECK-NEXT: call void @use(i1 [[C_5]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: ret void ; %d = sdiv i32 1, %x diff --git a/llvm/test/Transforms/SCCP/ip-ranges-binaryops.ll b/llvm/test/Transforms/SCCP/ip-ranges-binaryops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SCCP/ip-ranges-binaryops.ll @@ -0,0 +1,134 @@ +; RUN: opt < %s -ipsccp -S | FileCheck %s + +; x = [10, 21), y = [100, 201) +; x + y = [110, 221) +define internal i1 @f.add(i32 %x, i32 %y) { +; CHECK-LABEL: define internal i1 @f.add(i32 %x, i32 %y) { +; CHECK-NEXT: %a.1 = add i32 %x, %y +; CHECK-NEXT: %c.2 = icmp sgt i32 %a.1, 219 +; CHECK-NEXT: %c.4 = icmp slt i32 %a.1, 111 +; CHECK-NEXT: %c.5 = icmp eq i32 %a.1, 150 +; CHECK-NEXT: %c.6 = icmp slt i32 %a.1, 150 +; CHECK-NEXT: %res.1 = add i1 false, %c.2 +; CHECK-NEXT: %res.2 = add i1 %res.1, false +; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 +; CHECK-NEXT: %res.4 = add i1 %res.3, %c.5 +; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 +; CHECK-NEXT: ret i1 %res.5 +; + %a.1 = add i32 %x, %y + %c.1 = icmp sgt i32 %a.1, 220 + %c.2 = icmp sgt i32 %a.1, 219 + %c.3 = icmp slt i32 %a.1, 110 + %c.4 = icmp slt i32 %a.1, 111 + %c.5 = icmp eq i32 %a.1, 150 + %c.6 = icmp slt i32 %a.1, 150 + %res.1 = add i1 %c.1, %c.2 + %res.2 = add i1 %res.1, %c.3 + %res.3 = add i1 %res.2, %c.4 + %res.4 = add i1 %res.3, %c.5 + %res.5 = add i1 %res.4, %c.6 + ret i1 %res.5 +} + +define i1 @caller.add() { +; CHECK-LABEL: define i1 @caller.add() { +; CHECK-NEXT: %call.1 = tail call i1 @f.add(i32 10, i32 100) +; CHECK-NEXT: %call.2 = tail call i1 @f.add(i32 20, i32 200) +; CHECK-NEXT: %res = and i1 %call.1, %call.2 +; CHECK-NEXT: ret i1 %res +; + %call.1 = tail call i1 @f.add(i32 10, i32 100) + %call.2 = tail call i1 @f.add(i32 20, i32 200) + %res = and i1 %call.1, %call.2 + ret i1 %res +} + + +; x = [10, 21), y = [100, 201) +; x - y = [-190, -79) +define internal i1 @f.sub(i32 %x, i32 %y) { +; CHECK-LABEL: define internal i1 @f.sub(i32 %x, i32 %y) { +; CHECK-NEXT: %a.1 = sub i32 %x, %y +; CHECK-NEXT: %c.2 = icmp sgt i32 %a.1, -81 +; CHECK-NEXT: %c.4 = icmp slt i32 %a.1, -189 +; CHECK-NEXT: %c.5 = icmp eq i32 %a.1, -150 +; CHECK-NEXT: %c.6 = icmp slt i32 %a.1, -150 +; CHECK-NEXT: %res.1 = add i1 false, %c.2 +; CHECK-NEXT: %res.2 = add i1 %res.1, false +; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 +; CHECK-NEXT: %res.4 = add i1 %res.3, %c.5 +; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 +; CHECK-NEXT: ret i1 %res.5 +; + %a.1 = sub i32 %x, %y + %c.1 = icmp sgt i32 %a.1, -80 + %c.2 = icmp sgt i32 %a.1, -81 + %c.3 = icmp slt i32 %a.1, -190 + %c.4 = icmp slt i32 %a.1, -189 + %c.5 = icmp eq i32 %a.1, -150 + %c.6 = icmp slt i32 %a.1, -150 + %res.1 = add i1 %c.1, %c.2 + %res.2 = add i1 %res.1, %c.3 + %res.3 = add i1 %res.2, %c.4 + %res.4 = add i1 %res.3, %c.5 + %res.5 = add i1 %res.4, %c.6 + ret i1 %res.5 +} + +define i1 @caller.sub() { +; CHECK-LABEL: define i1 @caller.sub() { +; CHECK-NEXT: %call.1 = tail call i1 @f.sub(i32 10, i32 100) +; CHECK-NEXT: %call.2 = tail call i1 @f.sub(i32 20, i32 200) +; CHECK-NEXT: %res = and i1 %call.1, %call.2 +; CHECK-NEXT: ret i1 %res +; + %call.1 = tail call i1 @f.sub(i32 10, i32 100) + %call.2 = tail call i1 @f.sub(i32 20, i32 200) + %res = and i1 %call.1, %call.2 + ret i1 %res +} + +; x = [10, 21), y = [100, 201) +; x * y = [1000, 4001) +define internal i1 @f.mul(i32 %x, i32 %y) { +; CHECK-LABEL: define internal i1 @f.mul(i32 %x, i32 %y) { +; CHECK-NEXT: %a.1 = mul i32 %x, %y +; CHECK-NEXT: %c.2 = icmp sgt i32 %a.1, 3999 +; CHECK-NEXT: %c.4 = icmp slt i32 %a.1, 1001 +; CHECK-NEXT: %c.5 = icmp eq i32 %a.1, 1500 +; CHECK-NEXT: %c.6 = icmp slt i32 %a.1, 1500 +; CHECK-NEXT: %res.1 = add i1 false, %c.2 +; CHECK-NEXT: %res.2 = add i1 %res.1, false +; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 +; CHECK-NEXT: %res.4 = add i1 %res.3, %c.5 +; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 +; CHECK-NEXT: ret i1 %res.5 +; + %a.1 = mul i32 %x, %y + %c.1 = icmp sgt i32 %a.1, 4000 + %c.2 = icmp sgt i32 %a.1, 3999 + %c.3 = icmp slt i32 %a.1, 1000 + %c.4 = icmp slt i32 %a.1, 1001 + %c.5 = icmp eq i32 %a.1, 1500 + %c.6 = icmp slt i32 %a.1, 1500 + %res.1 = add i1 %c.1, %c.2 + %res.2 = add i1 %res.1, %c.3 + %res.3 = add i1 %res.2, %c.4 + %res.4 = add i1 %res.3, %c.5 + %res.5 = add i1 %res.4, %c.6 + ret i1 %res.5 +} + +define i1 @caller.mul() { +; CHECK-LABEL: define i1 @caller.mul() { +; CHECK-NEXT: %call.1 = tail call i1 @f.mul(i32 10, i32 100) +; CHECK-NEXT: %call.2 = tail call i1 @f.mul(i32 20, i32 200) +; CHECK-NEXT: %res = and i1 %call.1, %call.2 +; CHECK-NEXT: ret i1 %res +; + %call.1 = tail call i1 @f.mul(i32 10, i32 100) + %call.2 = tail call i1 @f.mul(i32 20, i32 200) + %res = and i1 %call.1, %call.2 + ret i1 %res +} diff --git a/llvm/test/Transforms/SCCP/range-and.ll b/llvm/test/Transforms/SCCP/range-and.ll --- a/llvm/test/Transforms/SCCP/range-and.ll +++ b/llvm/test/Transforms/SCCP/range-and.ll @@ -8,16 +8,13 @@ ; CHECK-NEXT: [[R:%.*]] = and i64 [[A:%.*]], 255 ; CHECK-NEXT: [[C_0:%.*]] = icmp slt i64 [[R]], 15 ; CHECK-NEXT: call void @use(i1 [[C_0]]) -; CHECK-NEXT: [[C_1:%.*]] = icmp slt i64 [[R]], 256 -; CHECK-NEXT: call void @use(i1 [[C_1]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[C_2:%.*]] = icmp eq i64 [[R]], 100 ; CHECK-NEXT: call void @use(i1 [[C_2]]) -; CHECK-NEXT: [[C_3:%.*]] = icmp eq i64 [[R]], 300 -; CHECK-NEXT: call void @use(i1 [[C_3]]) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[C_4:%.*]] = icmp ne i64 [[R]], 100 ; CHECK-NEXT: call void @use(i1 [[C_4]]) -; CHECK-NEXT: [[C_5:%.*]] = icmp ne i64 [[R]], 300 -; CHECK-NEXT: call void @use(i1 [[C_5]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret void ; %r = and i64 %a, 255 diff --git a/llvm/test/Transforms/SCCP/vector-bitcast.ll b/llvm/test/Transforms/SCCP/vector-bitcast.ll --- a/llvm/test/Transforms/SCCP/vector-bitcast.ll +++ b/llvm/test/Transforms/SCCP/vector-bitcast.ll @@ -2,7 +2,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" -; CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64>* %p +; FIXME: Add back support for handling special values of vector/fp types. +; CHECK: store volatile <2 x i64> %and.i119.i, <2 x i64>* %p ; rdar://11324230 define void @foo(<2 x i64>* %p) nounwind {