diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2053,7 +2053,7 @@ } /// Match UB-safe variants of the funnel shift intrinsic. -static Instruction *matchFunnelShift(Instruction &Or) { +static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) { // TODO: Can we reduce the code duplication between this and the related // rotate matching code under visitSelect and visitTrunc? unsigned Width = Or.getType()->getScalarSizeInBits(); @@ -2094,6 +2094,16 @@ return L; } + // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width. + // We limit this to X < Width in case the backend re-expands the intrinsic, + // and has to reintroduce a shift modulo operation (InstCombine might remove + // it after this fold). This still doesn't guarantee that the final codegen + // will match this original pattern. + if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) { + KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or); + return KnownL.getMaxValue().ult(Width) ? L : nullptr; + } + // For non-constant cases, the following patterns currently only work for // rotation patterns. // TODO: Add general funnel-shift compatible patterns. @@ -2590,7 +2600,7 @@ if (Instruction *BSwap = matchBSwap(I)) return BSwap; - if (Instruction *Funnel = matchFunnelShift(I)) + if (Instruction *Funnel = matchFunnelShift(I, *this)) return Funnel; if (Instruction *Concat = matchOrConcat(I, Builder)) diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll --- a/llvm/test/Transforms/InstCombine/funnel.ll +++ b/llvm/test/Transforms/InstCombine/funnel.ll @@ -168,11 +168,7 @@ define i64 @fshl_sub_mask(i64 %x, i64 %y, i64 %a) { ; CHECK-LABEL: @fshl_sub_mask( -; CHECK-NEXT: [[MASK:%.*]] = and i64 [[A:%.*]], 63 -; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[X:%.*]], [[MASK]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 64, [[MASK]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[Y:%.*]], [[SUB]] -; CHECK-NEXT: [[R:%.*]] = or i64 [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.fshl.i64(i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[A:%.*]]) ; CHECK-NEXT: ret i64 [[R]] ; %mask = and i64 %a, 63 @@ -187,11 +183,7 @@ define i64 @fshr_sub_mask(i64 %x, i64 %y, i64 %a) { ; CHECK-LABEL: @fshr_sub_mask( -; CHECK-NEXT: [[MASK:%.*]] = and i64 [[A:%.*]], 63 -; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], [[MASK]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 64, [[MASK]] -; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[Y:%.*]], [[SUB]] -; CHECK-NEXT: [[R:%.*]] = or i64 [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[A:%.*]]) ; CHECK-NEXT: ret i64 [[R]] ; %mask = and i64 %a, 63 @@ -204,11 +196,7 @@ define <2 x i64> @fshr_sub_mask_vector(<2 x i64> %x, <2 x i64> %y, <2 x i64> %a) { ; CHECK-LABEL: @fshr_sub_mask_vector( -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i64> [[X:%.*]], [[MASK]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <2 x i64> , [[MASK]] -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i64> [[Y:%.*]], [[SUB]] -; CHECK-NEXT: [[R:%.*]] = or <2 x i64> [[SHL]], [[SHR]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[A:%.*]]) ; CHECK-NEXT: ret <2 x i64> [[R]] ; %mask = and <2 x i64> %a, diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -676,12 +676,8 @@ define i64 @rotl_sub_mask(i64 %0, i64 %1) { ; CHECK-LABEL: @rotl_sub_mask( -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63 -; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP0:%.*]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP0]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]] -; CHECK-NEXT: ret i64 [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %3 = and i64 %1, 63 %4 = shl i64 %0, %3 @@ -695,12 +691,8 @@ define i64 @rotr_sub_mask(i64 %0, i64 %1) { ; CHECK-LABEL: @rotr_sub_mask( -; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0:%.*]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP0]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]] -; CHECK-NEXT: ret i64 [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; %3 = and i64 %1, 63 %4 = lshr i64 %0, %3 @@ -712,12 +704,8 @@ define <2 x i64> @rotr_sub_mask_vector(<2 x i64> %0, <2 x i64> %1) { ; CHECK-LABEL: @rotr_sub_mask_vector( -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP1:%.*]], -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP0:%.*]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <2 x i64> , [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = shl <2 x i64> [[TMP0]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP6]], [[TMP4]] -; CHECK-NEXT: ret <2 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[TMP0:%.*]], <2 x i64> [[TMP0]], <2 x i64> [[TMP1:%.*]]) +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %3 = and <2 x i64> %1, %4 = lshr <2 x i64> %0, %3