Index: lib/Analysis/LazyValueInfo.cpp =================================================================== --- lib/Analysis/LazyValueInfo.cpp +++ lib/Analysis/LazyValueInfo.cpp @@ -456,6 +456,8 @@ PHINode *PN, BasicBlock *BB); bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S, BasicBlock *BB); + bool solveBlockValueExtractValue(LVILatticeVal &BBLV, + ExtractValueInst *EV, BasicBlock *BB); bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, Instruction *BBI, BasicBlock *BB); bool solveBlockValueCast(LVILatticeVal &BBLV, @@ -679,6 +681,12 @@ return true; } else if (BBI->getType()->isIntegerTy()) { + if (auto *EV = dyn_cast(BBI)) { + if (!solveBlockValueExtractValue(Res, EV, BB)) + return false; + insertResult(Val, BB, Res); + return true; + } if (isa(BBI)) { if (!solveBlockValueCast(Res, BBI, BB)) return false; @@ -808,6 +816,110 @@ return true; } +static ConstantRange transfer(unsigned Opcode, ConstantRange LHS, + ConstantRange RHS) { + // NOTE: We're currently limited by the set of operations that ConstantRange + // can evaluate symbolically. Enhancing that set will allows us to analyze + // more definitions. + switch (Opcode) { + case Instruction::Add: + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + return LHS.add(RHS); + case Instruction::Sub: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + return LHS.sub(RHS); + case Instruction::Mul: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + return LHS.multiply(RHS); + case Instruction::UDiv: + return LHS.udiv(RHS); + case Instruction::Shl: + return LHS.shl(RHS); + case Instruction::LShr: + return LHS.lshr(RHS); + case Instruction::And: + return LHS.binaryAnd(RHS); + case Instruction::Or: + return LHS.binaryOr(RHS); + default: + llvm_unreachable("unsupported binop or intrinsic"); + break; + } +} + +bool LazyValueInfoCache::solveBlockValueExtractValue(LVILatticeVal &BBLV, + ExtractValueInst *EV, + BasicBlock *BB) { + // The only case we're handling is extracting the non-overflow result of an + // x.with.overflow intrinsic. + IntrinsicInst *II = dyn_cast(EV->getOperand(0)); + if (!II) { + BBLV.markOverdefined(); + return true; + } + + // Bail early unless we're getting the zero-indexed output (the non-overflow + // result). + if (EV->getNumIndices() != 1 || *EV->idx_begin() != 0) { + BBLV.markOverdefined(); + return true; + } + + // And it's an overflow intrinsic. + switch (II->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + break; + default: + BBLV.markOverdefined(); + return true; + } + + if (!hasBlockValue(II->getOperand(0), BB)) + if (pushBlockValue(std::make_pair(BB, II->getOperand(0)))) + // More work to do before applying this transfer rule. + return false; + const unsigned LHSBitWidth = + DL.getTypeSizeInBits(II->getOperand(0)->getType()); + ConstantRange LHSRange = ConstantRange(LHSBitWidth); + if (hasBlockValue(II->getOperand(0), BB)) { + LVILatticeVal LHSVal = getBlockValue(II->getOperand(0), BB); + intersectAssumeBlockValueConstantRange(II->getOperand(0), LHSVal, II); + if (LHSVal.isConstantRange()) + LHSRange = LHSVal.getConstantRange(); + } + + if (!hasBlockValue(II->getOperand(1), BB)) + if (pushBlockValue(std::make_pair(BB, II->getOperand(1)))) + // More work to do before applying this transfer rule. + return false; + const unsigned RHSBitWidth = + DL.getTypeSizeInBits(II->getOperand(1)->getType()); + ConstantRange RHSRange = ConstantRange(RHSBitWidth); + if (hasBlockValue(II->getOperand(1), BB)) { + LVILatticeVal RHSVal = getBlockValue(II->getOperand(1), BB); + intersectAssumeBlockValueConstantRange(II->getOperand(1), RHSVal, II); + if (RHSVal.isConstantRange()) + RHSRange = RHSVal.getConstantRange(); + } + + BBLV.markConstantRange(transfer(II->getIntrinsicID(), LHSRange, RHSRange)); + if (!BBLV.isOverdefined()) { + auto CR = BBLV.getConstantRange(); + if (!CR.isFullSet()) { + errs() << "LVI got " << BBLV.getConstantRange() << " from an overflow: " << *II << "\n"; + } + } + return true; +} + bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB) { LVILatticeVal Result; // Start Undefined. @@ -1129,42 +1241,7 @@ ConstantInt *RHS = cast(BBI->getOperand(1)); ConstantRange RHSRange = ConstantRange(RHS->getValue()); - // NOTE: We're currently limited by the set of operations that ConstantRange - // can evaluate symbolically. Enhancing that set will allows us to analyze - // more definitions. - LVILatticeVal Result; - switch (BBI->getOpcode()) { - case Instruction::Add: - Result.markConstantRange(LHSRange.add(RHSRange)); - break; - case Instruction::Sub: - Result.markConstantRange(LHSRange.sub(RHSRange)); - break; - case Instruction::Mul: - Result.markConstantRange(LHSRange.multiply(RHSRange)); - break; - case Instruction::UDiv: - Result.markConstantRange(LHSRange.udiv(RHSRange)); - break; - case Instruction::Shl: - Result.markConstantRange(LHSRange.shl(RHSRange)); - break; - case Instruction::LShr: - Result.markConstantRange(LHSRange.lshr(RHSRange)); - break; - case Instruction::And: - Result.markConstantRange(LHSRange.binaryAnd(RHSRange)); - break; - case Instruction::Or: - Result.markConstantRange(LHSRange.binaryOr(RHSRange)); - break; - default: - // Should be dead if the code above is correct - llvm_unreachable("inconsistent with above"); - break; - } - - BBLV = Result; + BBLV.markConstantRange(transfer(BBI->getOpcode(), LHSRange, RHSRange)); return true; } Index: test/Transforms/CorrelatedValuePropagation/overflow.ll =================================================================== --- test/Transforms/CorrelatedValuePropagation/overflow.ll +++ test/Transforms/CorrelatedValuePropagation/overflow.ll @@ -0,0 +1,348 @@ +; RUN: opt -correlated-propagation -S %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; Make sure CVP (via LVI) can propagate values through sadd.with.overflow. +; CHECK-LABEL: @sadd1 +define i1 @sadd1(i32 %x, i32 %y) #0 { +entry: + %x.offset = add i32 %x, 9 + %cmp1 = icmp ult i32 %x.offset, 19 + br i1 %cmp1, label %cont1, label %out + +cont1: + %y.offset = add i32 %y, 9 + %cmp2 = icmp ult i32 %y.offset, 19 + br i1 %cmp2, label %cont2, label %out + +cont2: + ; x = [-9,10), y = [-9,10) + %res = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y) + %add = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + ; add = [-18,19) + %cmp3 = icmp slt i32 %add, 19 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK: ret i1 true + ret i1 %ret +} + +; Negative version of previous test. +; CHECK-LABEL: @sadd2 +define i1 @sadd2(i32 %x, i32 %y) #0 { +entry: + %x.offset = add i32 %x, 9 + %cmp1 = icmp ult i32 %x.offset, 19 + br i1 %cmp1, label %cont1, label %out + +cont1: + %y.offset = add i32 %y, 9 + %cmp2 = icmp ult i32 %y.offset, 19 + br i1 %cmp2, label %cont2, label %out + +cont2: + ; x = [-9,10), y = [-9,10) + %res = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y) + %add = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + ; add = [-18,19) + %cmp3 = icmp slt i32 %add, 18 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK-NOT: ret i1 true + ret i1 %ret +} + +; Make sure CVP (via LVI) can propagate values through uadd.with.overflow. +; CHECK-LABEL: @uadd1 +define i1 @uadd1(i32 %x, i32 %y) #0 { +entry: + %cmp1 = icmp ult i32 %x, 10 + br i1 %cmp1, label %cont1, label %out + +cont1: + %cmp2 = icmp ult i32 %y, 10 + br i1 %cmp2, label %cont2, label %out + +cont2: + %res = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) + %add = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + %cmp3 = icmp ult i32 %add, 19 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK: ret i1 true + ret i1 %ret +} + +; Negative version of previous test. +; CHECK-LABEL: @uadd2 +define i1 @uadd2(i32 %x, i32 %y) #0 { +entry: + %cmp1 = icmp ult i32 %x, 10 + br i1 %cmp1, label %cont1, label %out + +cont1: + %cmp2 = icmp ult i32 %y, 10 + br i1 %cmp2, label %cont2, label %out + +cont2: + ; x = [0,10), y = [0,10) + %res = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) + %add = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + %cmp3 = icmp ult i32 %add, 18 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK-NOT: ret i1 true + ret i1 %ret +} + +; Make sure CVP (via LVI) can propagate values through ssub.with.overflow. +; CHECK-LABEL: @ssub1 +define i1 @ssub1(i32 %x, i32 %y) #0 { +entry: + %cmp1 = icmp ult i32 %x, 10 + br i1 %cmp1, label %cont1, label %out + +cont1: + %cmp2 = icmp ult i32 %y, 10 + br i1 %cmp2, label %cont2, label %out + +cont2: + %offset = add i32 %x, 9 + ; x = [0,10), y = [0,10), offset = [9,19) + %res = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %offset, i32 %y) + %sub = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + %cmp3 = icmp ult i32 %sub, 19 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK: ret i1 true + ret i1 %ret +} + +; Negative version of previous test. +; CHECK-LABEL: @ssub2 +define i1 @ssub2(i32 %x, i32 %y) #0 { +entry: + %cmp1 = icmp ult i32 %x, 10 + br i1 %cmp1, label %cont1, label %out + +cont1: + %cmp2 = icmp ult i32 %y, 10 + br i1 %cmp2, label %cont2, label %out + +cont2: + %offset = add i32 %x, 8 + ; x = [0,10), y = [0,10), offset = [8,18) + %res = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %offset, i32 %y) + %sub = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + %cmp3 = icmp ult i32 %sub, 19 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK-NOT: ret i1 true + ret i1 %ret +} + +; Make sure CVP (via LVI) can propagate values through usub.with.overflow. +; CHECK-LABEL: @usub1 +define i1 @usub1(i32 %x, i32 %y) #0 { +entry: + %cmp1 = icmp ult i32 %x, 10 + br i1 %cmp1, label %cont1, label %out + +cont1: + %cmp2 = icmp ult i32 %y, 10 + br i1 %cmp2, label %cont2, label %out + +cont2: + %offset = add i32 %x, 9 + ; x = [0,10), y = [0,10), offset = [9,19) + %res = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %offset, i32 %y) + %sub = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + %cmp3 = icmp ult i32 %sub, 19 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK: ret i1 true + ret i1 %ret +} + +; Negative version of previous test. +; CHECK-LABEL: @usub2 +define i1 @usub2(i32 %x, i32 %y) #0 { +entry: + %cmp1 = icmp ult i32 %x, 10 + br i1 %cmp1, label %cont1, label %out + +cont1: + %cmp2 = icmp ult i32 %y, 10 + br i1 %cmp2, label %cont2, label %out + +cont2: + %offset = add i32 %x, 9 + ; x = [0,10), y = [0,10), offset = [9,19) + %res = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %offset, i32 %y) + %sub = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + %cmp3 = icmp ult i32 %sub, 18 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK-NOT: ret i1 true + ret i1 %ret +} + +; Make sure CVP (via LVI) can propagate values through smul.with.overflow. +; CHECK-LABEL: @smul1 +define i1 @smul1(i32 %x, i32 %y) #0 { +entry: + %x.offset = add i32 %x, 9 + %cmp1 = icmp ult i32 %x.offset, 19 + br i1 %cmp1, label %cont1, label %out + +cont1: + %y.offset = add i32 %y, 9 + %cmp2 = icmp ult i32 %y.offset, 19 + br i1 %cmp2, label %cont2, label %out + +cont2: + ; x = [-9,10), y = [-9,10) + %res = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %x, i32 %y) + %mul = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + %cmp3 = icmp sle i32 %mul, 81 + %cmp4 = icmp sge i32 %mul, -81 + %cmp5 = and i1 %cmp3, %cmp4 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp5, %cont3 ] +; CHECK: ret i1 true + ret i1 %ret +} + +; Negative version of previous test. +; CHECK-LABEL: @smul2 +define i1 @smul2(i32 %x, i32 %y) #0 { +entry: + %x.offset = add i32 %x, 9 + %cmp1 = icmp ult i32 %x.offset, 19 + br i1 %cmp1, label %cont1, label %out + +cont1: + %y.offset = add i32 %y, 9 + %cmp2 = icmp ult i32 %y.offset, 19 + br i1 %cmp2, label %cont2, label %out + +cont2: + ; x = [-9,10), y = [-9,10) + %res = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %x, i32 %y) + %mul = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + %cmp3 = icmp slt i32 %mul, 81 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK-NOT: ret i1 true + ret i1 %ret +} + +; Make sure CVP (via LVI) can propagate values through umul.with.overflow. +; CHECK-LABEL: @umul1 +define i1 @umul1(i32 %x, i32 %y) #0 { +entry: + %cmp1 = icmp ult i32 %x, 100 + br i1 %cmp1, label %cont1, label %out + +cont1: + %cmp2 = icmp ult i32 %y, 100 + br i1 %cmp2, label %cont2, label %out + +cont2: + %res = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y) + %mul = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + %cmp3 = icmp ule i32 %mul, 9801 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK: ret i1 true + ret i1 %ret +} + +; Negative version of previous test. +; CHECK-LABEL: @umul2 +define i1 @umul2(i32 %x, i32 %y) #0 { +entry: + %cmp1 = icmp ult i32 %x, 100 + br i1 %cmp1, label %cont1, label %out + +cont1: + %cmp2 = icmp ult i32 %y, 100 + br i1 %cmp2, label %cont2, label %out + +cont2: + %res = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 %y) + %mul = extractvalue { i32, i1 } %res, 0 + br label %cont3 + +cont3: + %cmp3 = icmp ult i32 %mul, 9801 + br label %out + +out: + %ret = phi i1 [ true, %entry], [ true, %cont1 ], [ %cmp3, %cont3 ] +; CHECK-NOT: ret i1 true + ret i1 %ret +} + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) #1