Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1157,6 +1157,22 @@ Value *FalseVal = SI.getFalseValue(); Type *SelType = SI.getType(); + // FIXME: Remove this workaround when freeze related patches are done. + // For select with undef operand which feeds into an equality comparison, + // don't simplify it so loop unswitch can know the equality comparison + // may have an undef operand. This is a workaround for PR31652 caused by + // descrepancy about branch on undef between LoopUnswitch and GVN. + if (isa(TrueVal) || isa(FalseVal)) { + if (any_of(SI.users(), [&](User *U) { + ICmpInst *CI = dyn_cast(U); + if (CI && CI->isEquality()) + return true; + return false; + })) { + return nullptr; + } + } + if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ.getWithInstruction(&SI))) return replaceInstUsesWith(SI, V); Index: llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -553,6 +553,48 @@ return false; } +/// FIXME: Remove this workaround when freeze related patches are done. +/// LoopUnswitch and Equality propagation in GVN have discrepancy about +/// whether branch on undef/poison has undefine behavior. Here it is to +/// rule out some common cases that we found such discrepancy already +/// causing problems. Detail could be found in PR31652. Note if the +/// func returns true, it is unsafe. But if it is false, it doesn't mean +/// it is necessarily safe. +static bool EqualityPropUnSafe(Value &LoopCond) { + ICmpInst *CI = dyn_cast(&LoopCond); + if (!CI || !CI->isEquality()) + return false; + + Value *LHS = CI->getOperand(0); + Value *RHS = CI->getOperand(1); + if (isa(LHS) || isa(RHS)) + return true; + + auto hasUndefInPHI = [](PHINode &PN) { + for (Value *Opd : PN.incoming_values()) { + if (isa(Opd)) + return true; + } + return false; + }; + PHINode *LPHI = dyn_cast(LHS); + PHINode *RPHI = dyn_cast(RHS); + if ((LPHI && hasUndefInPHI(*LPHI)) || (RPHI && hasUndefInPHI(*RPHI))) + return true; + + auto hasUndefInSelect = [](SelectInst &SI) { + if (isa(SI.getTrueValue()) || + isa(SI.getFalseValue())) + return true; + return false; + }; + SelectInst *LSI = dyn_cast(LHS); + SelectInst *RSI = dyn_cast(RHS); + if ((LSI && hasUndefInSelect(*LSI)) || (RSI && hasUndefInSelect(*RSI))) + return true; + return false; +} + /// Do actual work and unswitch loop if possible and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; @@ -666,8 +708,10 @@ // unswitch on it if we desire. Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, Changed).first; - if (LoopCond && - UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { + if (!LoopCond || EqualityPropUnSafe(*LoopCond)) + continue; + + if (UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { ++NumBranches; return true; } @@ -1035,6 +1079,9 @@ if (!LoopExitBB || isa(LoopExitBB->begin())) return false; // Can't handle this. + if (EqualityPropUnSafe(*LoopCond)) + return false; + UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB, CurrentTerm); ++NumBranches; Index: llvm/trunk/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll +++ llvm/trunk/test/Transforms/LoopUnswitch/unswitch-equality-undef.ll @@ -0,0 +1,121 @@ +; RUN: opt < %s -instcombine -licm -loop-unswitch -loop-unswitch-threshold=1000 -disable-output -stats 2>&1| FileCheck %s +; Check no loop unswitch is done because unswitching of equality expr with +; undef is unsafe before the freeze patch is committed. +; CHECK-NOT: Number of branches unswitched + +define void @ham(i64 %arg) local_unnamed_addr { +bb: + %tmp = icmp eq i64 %arg, 0 + br i1 %tmp, label %bb3, label %bb1 + +bb1: ; preds = %bb + %tmp2 = load volatile i64, i64* @global, align 8 + br label %bb3 + +bb3: ; preds = %bb1, %bb + %tmp4 = phi i64 [ %tmp2, %bb1 ], [ undef, %bb ] + %tmp5 = load i64, i64* @global.1, align 8 + br label %bb6 + +bb6: ; preds = %bb21, %bb3 + %tmp7 = phi i64 [ 3, %bb21 ], [ %tmp5, %bb3 ] + %tmp8 = phi i64 [ %tmp25, %bb21 ], [ 0, %bb3 ] + %tmp9 = icmp eq i64 %tmp7, %arg + br i1 %tmp9, label %bb10, label %bb28 + +bb10: ; preds = %bb6 + %tmp11 = icmp eq i64 %tmp7, 0 + br i1 %tmp11, label %bb21, label %bb12 + +bb12: ; preds = %bb10 + %tmp13 = load i64, i64* @global.2, align 8 + %tmp14 = add nsw i64 %tmp13, 1 + store i64 %tmp14, i64* @global.2, align 8 + %tmp15 = load i64, i64* @global.3, align 8 + %tmp16 = icmp eq i64 %tmp15, %tmp4 + br i1 %tmp16, label %bb17, label %bb21 + +bb17: ; preds = %bb12 + %tmp18 = phi i64 [ %tmp15, %bb12 ] + %tmp19 = load i64, i64* @global.4, align 8 + %tmp20 = add nsw i64 %tmp19, %tmp18 + store i64 %tmp20, i64* @global.5, align 8 + br label %bb29 + +bb21: ; preds = %bb12, %bb10 + %tmp22 = load i64, i64* @global.3, align 8 + %tmp23 = load volatile i64, i64* @global, align 8 + %tmp24 = add nsw i64 %tmp23, %tmp22 + store i64 %tmp24, i64* @global.5, align 8 + store i64 3, i64* @global.1, align 8 + %tmp25 = add nsw i64 %tmp8, 1 + %tmp26 = load i64, i64* @global.6, align 8 + %tmp27 = icmp slt i64 %tmp25, %tmp26 + br i1 %tmp27, label %bb6, label %bb28 + +bb28: ; preds = %bb21, %bb6 + br label %bb29 + +bb29: ; preds = %bb28, %bb17 + ret void +} + +define void @zot(i64 %arg, i64 %arg1) local_unnamed_addr { +bb: + %tmp = icmp eq i64 %arg, 0 + %tmp2 = select i1 %tmp, i64 %arg1, i64 undef + %tmp3 = load i64, i64* @global.1, align 8 + br label %bb4 + +bb4: ; preds = %bb19, %bb + %tmp5 = phi i64 [ 3, %bb19 ], [ %tmp3, %bb ] + %tmp6 = phi i64 [ %tmp23, %bb19 ], [ 0, %bb ] + %tmp7 = icmp eq i64 %tmp5, %arg + br i1 %tmp7, label %bb8, label %bb26 + +bb8: ; preds = %bb4 + %tmp9 = icmp eq i64 %tmp5, 0 + br i1 %tmp9, label %bb19, label %bb10 + +bb10: ; preds = %bb8 + %tmp11 = load i64, i64* @global.2, align 8 + %tmp12 = add nsw i64 %tmp11, 1 + store i64 %tmp12, i64* @global.2, align 8 + %tmp13 = load i64, i64* @global.3, align 8 + %tmp14 = icmp eq i64 %tmp13, %tmp2 + br i1 %tmp14, label %bb15, label %bb19 + +bb15: ; preds = %bb10 + %tmp16 = phi i64 [ %tmp13, %bb10 ] + %tmp17 = load i64, i64* @global.4, align 8 + %tmp18 = add nsw i64 %tmp17, %tmp16 + store i64 %tmp18, i64* @global.5, align 8 + br label %bb27 + +bb19: ; preds = %bb10, %bb8 + %tmp20 = load i64, i64* @global.3, align 8 + %tmp21 = load volatile i64, i64* @global, align 8 + %tmp22 = add nsw i64 %tmp21, %tmp20 + store i64 %tmp22, i64* @global.5, align 8 + store i64 3, i64* @global.1, align 8 + %tmp23 = add nsw i64 %tmp6, 1 + %tmp24 = load i64, i64* @global.6, align 8 + %tmp25 = icmp slt i64 %tmp23, %tmp24 + br i1 %tmp25, label %bb4, label %bb26 + +bb26: ; preds = %bb19, %bb4 + br label %bb27 + +bb27: ; preds = %bb26, %bb15 + ret void +} + +@global = common global i64 0, align 8 +@global.1 = common global i64 0, align 8 +@global.2 = common global i64 0, align 8 +@global.3 = common global i64 0, align 8 +@global.4 = common global i64 0, align 8 +@global.5 = common global i64 0, align 8 +@global.6 = common global i64 0, align 8 + +