diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2072,7 +2072,7 @@ } /// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic. -static Instruction *matchRotate(Instruction &Or) { +Instruction *InstCombinerImpl::matchRotate(BinaryOperator &Or) { // TODO: Can we reduce the code duplication between this and the related // rotate matching code under visitSelect and visitTrunc? unsigned Width = Or.getType()->getScalarSizeInBits(); @@ -2096,7 +2096,7 @@ // Match the shift amount operands for a rotate pattern. This always matches // a subtraction on the R operand. - auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * { + auto matchShiftAmount = [this,&Or](Value *L, Value *R, unsigned Width) -> Value * { // Check for constant shift amounts that sum to the bitwidth. // TODO: Support non-uniform shift amounts. const APInt *LC, *RC; @@ -2104,6 +2104,12 @@ if (LC->ult(Width) && RC->ult(Width) && (*LC + *RC) == Width) return ConstantInt::get(L->getType(), *LC); + // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width + if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) { + KnownBits KnownL = computeKnownBits(L, /*Depth*/ 0, &Or); + return KnownL.getMaxValue().ult(Width) ? L : nullptr; + } + // For non-constant cases we don't support non-pow2 shift masks. // TODO: Is it worth matching urem as well? if (!isPowerOf2_32(Width)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -359,6 +359,7 @@ Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II); Instruction *foldFPSignBitOps(BinaryOperator &I); + Instruction *matchRotate(BinaryOperator &Or); public: /// Inserts an instruction \p New before instruction \p Old diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -679,12 +679,8 @@ define i64 @rotl_sub_mask(i64 %0, i64 %1) { ; CHECK-LABEL: @rotl_sub_mask( -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63 -; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP0:%.*]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP0]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]] -; CHECK-NEXT: ret i64 [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %3 = and i64 %1, 63 %4 = shl i64 %0, %3 @@ -698,12 +694,8 @@ define i64 @rotr_sub_mask(i64 %0, i64 %1) { ; CHECK-LABEL: @rotr_sub_mask( -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0:%.*]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP0]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]] -; CHECK-NEXT: ret i64 [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %3 = and i64 %1, 63 %4 = lshr i64 %0, %3 @@ -715,12 +707,8 @@ define <2 x i64> @rotr_sub_mask_vector(<2 x i64> %0, <2 x i64> %1) { ; CHECK-LABEL: @rotr_sub_mask_vector( -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP1:%.*]], -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP0:%.*]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <2 x i64> , [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = shl <2 x i64> [[TMP0]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP6]], [[TMP4]] -; CHECK-NEXT: ret <2 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[TMP0:%.*]], <2 x i64> [[TMP0]], <2 x i64> [[TMP1:%.*]]) +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %3 = and <2 x i64> %1, %4 = lshr <2 x i64> %0, %3