Index: lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -59,6 +59,98 @@ }; } // namespace +/// Match a pattern for a bitwise rotate operation that partially guards +/// against undefined behavior by branching around the rotation when the shift +/// amount is 0. +static bool foldGuardedRotateToFunnelShift(Instruction &I) { + if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2) + return false; + + // As with the one-use checks below, this is not strictly necessary, but we + // are being cautious to avoid potential perf regressions on targets that + // do not actually have a rotate instruction (where the funnel shift would be + // expanded back into math/shift/logic ops). + if (!isPowerOf2_32(I.getType()->getScalarSizeInBits())) + return false; + + // Match V to funnel shift left/right and capture the source operand and + // shift amount in X and Y. + auto matchRotate = [](Value *V, Value *&X, Value *&Y) { + Value *L0, *L1, *R0, *R1; + unsigned Width = V->getType()->getScalarSizeInBits(); + auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1)); + + // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y)) + auto RotL = m_OneUse(m_c_Or(m_Shl(m_Value(L0), m_Value(L1)), + m_LShr(m_Value(R0), Sub))); + if (RotL.match(V) && L0 == R0 && L1 == R1) { + X = L0; + Y = L1; + return Intrinsic::fshl; + } + + // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y)) + auto RotR = m_OneUse(m_c_Or(m_LShr(m_Value(L0), m_Value(L1)), + m_Shl(m_Value(R0), Sub))); + if (RotR.match(V) && L0 == R0 && L1 == R1) { + X = L0; + Y = L1; + return Intrinsic::fshr; + } + + return Intrinsic::not_intrinsic; + }; + + // One phi operand must be a rotate operation, and the other phi operand must + // be the source value of that rotate operation: + // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ] + PHINode &Phi = cast(I); + Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1); + Value *RotSrc, *RotAmt; + Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt); + if (IID == Intrinsic::not_intrinsic || RotSrc != P1) { + IID = matchRotate(P1, RotSrc, RotAmt); + if (IID == Intrinsic::not_intrinsic || RotSrc != P0) + return false; + assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && + "Pattern must match funnel shift left or right"); + } + + // The incoming block with our source operand must be the "guard" block. + // That must contain a cmp+branch to avoid the rotate when the shift amount + // is equal to 0. The other incoming block is the block with the rotate. + BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1); + BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1); + Instruction *TermI = GuardBB->getTerminator(); + BasicBlock *TrueBB, *FalseBB; + ICmpInst::Predicate Pred; + if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), + TrueBB, FalseBB))) + return false; + + if (Pred != CmpInst::ICMP_EQ || TrueBB != Phi.getParent() || FalseBB != RotBB) + return false; + + // We matched a variation of this IR pattern: + // GuardBB: + // %cmp = icmp eq i32 %RotAmt, 0 + // br i1 %cmp, label %PhiBB, label %RotBB + // RotBB: + // %sub = sub i32 32, %RotAmt + // %shr = lshr i32 %X, %sub + // %shl = shl i32 %X, %RotAmt + // %rot = or i32 %shr, %shl + // br label %PhiBB + // PhiBB: + // %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ] + // --> + // llvm.fshl.i32(i32 %X, i32 %RotAmt) + IRBuilder<> Builder(&Phi); + Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType()); + Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt})); + return true; +} + /// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and /// the bit indexes (Mask) needed by a masked compare. If we're matching a chain /// of 'and' ops, then we also need to capture the fact that we saw an @@ -174,8 +266,10 @@ // Also, we want to avoid matching partial patterns. // TODO: It would be more efficient if we removed dead instructions // iteratively in this loop rather than waiting until the end. - for (Instruction &I : make_range(BB.rbegin(), BB.rend())) + for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { MadeChange |= foldAnyOrAllBitsSet(I); + MadeChange |= foldGuardedRotateToFunnelShift(I); + } } // We're done with transforms, so remove dead instructions. Index: test/Transforms/AggressiveInstCombine/rotate.ll =================================================================== --- test/Transforms/AggressiveInstCombine/rotate.ll +++ test/Transforms/AggressiveInstCombine/rotate.ll @@ -9,14 +9,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] ; CHECK: rotbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %cmp = icmp eq i32 %b, 0 @@ -40,14 +36,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] ; CHECK: rotbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %cmp = icmp eq i32 %b, 0 @@ -71,14 +63,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] ; CHECK: rotbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %cmp = icmp eq i32 %b, 0 @@ -102,14 +90,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] ; CHECK: rotbb: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: %cmp = icmp eq i32 %b, 0 @@ -133,10 +117,205 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] ; CHECK: rotbb: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %cmp = icmp eq i32 %b, 0 + br i1 %cmp, label %end, label %rotbb + +rotbb: + %sub = sub i32 32, %b + %shl = shl i32 %a, %sub + %shr = lshr i32 %a, %b + %or = or i32 %shr, %shl + br label %end + +end: + %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ] + ret i32 %cond +} + +define i32 @rotr_commute_or(i32 %a, i32 %b) { +; CHECK-LABEL: @rotr_commute_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] +; CHECK: rotbb: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %cmp = icmp eq i32 %b, 0 + br i1 %cmp, label %end, label %rotbb + +rotbb: + %sub = sub i32 32, %b + %shl = shl i32 %a, %sub + %shr = lshr i32 %a, %b + %or = or i32 %shl, %shr + br label %end + +end: + %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ] + ret i32 %cond +} + +; Negative test - non-power-of-2 might require urem expansion in the backend. + +define i12 @could_be_rotr_weird_type(i12 %a, i12 %b) { +; CHECK-LABEL: @could_be_rotr_weird_type( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i12 [[B:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] +; CHECK: rotbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i12 12, [[B]] +; CHECK-NEXT: [[SHL:%.*]] = shl i12 [[A:%.*]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i12 [[A]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = or i12 [[SHL]], [[SHR]] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[COND:%.*]] = phi i12 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] +; CHECK-NEXT: ret i12 [[COND]] +; +entry: + %cmp = icmp eq i12 %b, 0 + br i1 %cmp, label %end, label %rotbb + +rotbb: + %sub = sub i12 12, %b + %shl = shl i12 %a, %sub + %shr = lshr i12 %a, %b + %or = or i12 %shl, %shr + br label %end + +end: + %cond = phi i12 [ %a, %entry ], [ %or, %rotbb ] + ret i12 %cond +} + +; Negative test - wrong phi ops. + +define i32 @not_rotr_1(i32 %a, i32 %b) { +; CHECK-LABEL: @not_rotr_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] +; CHECK: rotbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %cmp = icmp eq i32 %b, 0 + br i1 %cmp, label %end, label %rotbb + +rotbb: + %sub = sub i32 32, %b + %shl = shl i32 %a, %sub + %shr = lshr i32 %a, %b + %or = or i32 %shl, %shr + br label %end + +end: + %cond = phi i32 [ %b, %entry ], [ %or, %rotbb ] + ret i32 %cond +} + +; Negative test - too many phi ops. + +define i32 @not_rotr_2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_rotr_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] +; CHECK: rotbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] +; CHECK-NEXT: [[CMP42:%.*]] = icmp ugt i32 [[OR]], 42 +; CHECK-NEXT: br i1 [[CMP42]], label [[END]], label [[BOGUS:%.*]] +; CHECK: bogus: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ], [ [[C:%.*]], [[BOGUS]] ] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %cmp = icmp eq i32 %b, 0 + br i1 %cmp, label %end, label %rotbb + +rotbb: + %sub = sub i32 32, %b + %shl = shl i32 %a, %sub + %shr = lshr i32 %a, %b + %or = or i32 %shl, %shr + %cmp42 = icmp ugt i32 %or, 42 + br i1 %cmp42, label %end, label %bogus + +bogus: + br label %end + +end: + %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ], [ %c, %bogus ] + ret i32 %cond +} + +; Negative test - wrong cmp (but this should match?). + +define i32 @not_rotr_3(i32 %a, i32 %b) { +; CHECK-LABEL: @not_rotr_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[B:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] +; CHECK: rotbb: ; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]] -; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %cmp = icmp sle i32 %b, 0 + br i1 %cmp, label %end, label %rotbb + +rotbb: + %sub = sub i32 32, %b + %shl = shl i32 %a, %sub + %shr = lshr i32 %a, %b + %or = or i32 %shl, %shr + br label %end + +end: + %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ] + ret i32 %cond +} + +; Negative test - wrong shift. + +define i32 @not_rotr_4(i32 %a, i32 %b) { +; CHECK-LABEL: @not_rotr_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] +; CHECK: rotbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[A]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] @@ -149,8 +328,41 @@ rotbb: %sub = sub i32 32, %b %shl = shl i32 %a, %sub + %shr = ashr i32 %a, %b + %or = or i32 %shl, %shr + br label %end + +end: + %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ] + ret i32 %cond +} + +; Negative test - wrong shift. + +define i32 @not_rotr_5(i32 %a, i32 %b) { +; CHECK-LABEL: @not_rotr_5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] +; CHECK: rotbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[B]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %cmp = icmp eq i32 %b, 0 + br i1 %cmp, label %end, label %rotbb + +rotbb: + %sub = sub i32 32, %b + %shl = shl i32 %b, %sub %shr = lshr i32 %a, %b - %or = or i32 %shr, %shl + %or = or i32 %shl, %shr br label %end end: @@ -158,8 +370,46 @@ ret i32 %cond } -define i32 @rotr_commute_or(i32 %a, i32 %b) { -; CHECK-LABEL: @rotr_commute_or( +; Negative test - wrong sub. + +define i32 @not_rotr_6(i32 %a, i32 %b) { +; CHECK-LABEL: @not_rotr_6( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] +; CHECK: rotbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 8, [[B]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %cmp = icmp eq i32 %b, 0 + br i1 %cmp, label %end, label %rotbb + +rotbb: + %sub = sub i32 8, %b + %shl = shl i32 %a, %sub + %shr = lshr i32 %a, %b + %or = or i32 %shl, %shr + br label %end + +end: + %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ] + ret i32 %cond +} + +; Negative test - extra use. Technically, we could transform this +; because it doesn't increase the instruction count, but we're +; being cautious not to cause a potential perf pessimization for +; targets that do not have a rotate instruction. + +define i32 @could_be_rotr(i32 %a, i32 %b, i32* %p) { +; CHECK-LABEL: @could_be_rotr( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] @@ -168,6 +418,7 @@ ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] ; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]] ; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] +; CHECK-NEXT: store i32 [[OR]], i32* [[P:%.*]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] @@ -182,6 +433,7 @@ %shl = shl i32 %a, %sub %shr = lshr i32 %a, %b %or = or i32 %shl, %shr + store i32 %or, i32* %p br label %end end: Index: test/Transforms/PhaseOrdering/rotate.ll =================================================================== --- test/Transforms/PhaseOrdering/rotate.ll +++ test/Transforms/PhaseOrdering/rotate.ll @@ -9,17 +9,8 @@ define i32 @rotl(i32 %a, i32 %b) { ; OLDPM-LABEL: @rotl( ; OLDPM-NEXT: entry: -; OLDPM-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 -; OLDPM-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] -; OLDPM: rotbb: -; OLDPM-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] -; OLDPM-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]] -; OLDPM-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]] -; OLDPM-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] -; OLDPM-NEXT: br label [[END]] -; OLDPM: end: -; OLDPM-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ] -; OLDPM-NEXT: ret i32 [[COND]] +; OLDPM-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B:%.*]]) +; OLDPM-NEXT: ret i32 [[TMP0]] ; ; NEWPM-LABEL: @rotl( ; NEWPM-NEXT: entry: