Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -1283,6 +1283,20 @@ if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1)))) return X; + // ((X << A) | Y) >> A -> X if effective width of Y is not larger than A. + // We can return X as we do in the above case since OR alters no bits in X. + Value *Y; + const APInt *ShAmt; + if (match(Op1, m_APInt(ShAmt)) && + match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_Specific(Op1)), m_Value(Y)))) { + const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + const unsigned ShiftCnt = ShAmt->getZExtValue(); + const unsigned Width = Op0->getType()->getScalarSizeInBits(); + const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros(); + if (EffWidthY <= ShiftCnt) + return X; + } + return nullptr; } @@ -1804,6 +1818,38 @@ MaxRecurse)) return V; + // Assuming the effective width of Y is not larger than A, i.e. all bits + // from X and Y are disjoint in (X << A) | Y, + // if the mask of this AND op covers all bits of X or Y, while it covers + // no bits from the other, we can bypass this AND op. E.g., + // ((X << A) | Y) & Mask -> Y, + // if Mask = ((1 << effective_width_of(Y)) - 1) + // ((X << A) | Y) & Mask -> X << A, + // if Mask = ((1 << effective_width_of(X)) - 1) << A + Value *Y, *XShifted; + if (match(Op1, m_APInt(Mask)) && + match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)), + m_Value(XShifted)), + m_Value(Y)))) { + const unsigned ShiftCnt = ShAmt->getZExtValue(); + const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + const unsigned Width = Op0->getType()->getScalarSizeInBits(); + const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros(); + if (EffWidthY <= ShiftCnt) { + const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, + Q.DT); + const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros(); + const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY); + const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShiftCnt; + // If the mask is extracting all bits from X or Y as is, we can skip + // this AND op. + if (EffBitsY.isSubsetOf(*Mask) && !EffBitsX.intersects(*Mask)) + return Y; + if (EffBitsX.isSubsetOf(*Mask) && !EffBitsY.intersects(*Mask)) + return XShifted; + } + } + return nullptr; } Index: test/Transforms/InstSimplify/AndOrXor.ll =================================================================== --- test/Transforms/InstSimplify/AndOrXor.ll +++ test/Transforms/InstSimplify/AndOrXor.ll @@ -967,12 +967,8 @@ define i64 @shl_or_and1(i32 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and1( -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1 -; CHECK-NEXT: ret i64 [[TMP5]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64 +; CHECK-NEXT: ret i64 [[TMP1]] ; %tmp1 = zext i32 %a to i64 %tmp2 = zext i1 %b to i64 @@ -985,11 +981,8 @@ define i64 @shl_or_and2(i32 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and2( ; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4294967296 -; CHECK-NEXT: ret i64 [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 32 +; CHECK-NEXT: ret i64 [[TMP2]] ; %tmp1 = zext i1 %b to i64 %tmp2 = zext i32 %a to i64 @@ -999,35 +992,26 @@ ret i64 %tmp5 } -define i32 @shl_or_and3(i32 %a, i32 %b) { +define i64 @shl_or_and3(i32 %a, i32 %b) { ; concatinate two 32-bit integers and extract lower 32-bit ; CHECK-LABEL: @shl_or_and3( -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4294967295 -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 %b to i64 +; CHECK-NEXT: ret i64 [[TMP1]] ; %tmp1 = zext i32 %a to i64 %tmp2 = zext i32 %b to i64 %tmp3 = shl nuw i64 %tmp1, 32 %tmp4 = or i64 %tmp2, %tmp3 %tmp5 = and i64 %tmp4, 4294967295 - %tmp6 = trunc i64 %tmp5 to i32 - ret i32 %tmp6 + ret i64 %tmp5 } define i32 @shl_or_and4(i16 %a, i16 %b) { ; concatinate two 16-bit integers and extract higher 16-bit ; CHECK-LABEL: @shl_or_and4( ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], -65536 -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 16 +; CHECK-NEXT: ret i32 [[TMP2]] ; %tmp1 = zext i16 %a to i32 %tmp2 = zext i16 %b to i32 @@ -1037,23 +1021,17 @@ ret i32 %tmp5 } -define i64 @shl_or_and5(i64 %a, i1 %b) { +define i128 @shl_or_and5(i64 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and5( -; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[A:%.*]] to i128 -; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[B:%.*]] to i128 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i128 [[TMP1]], 64 -; CHECK-NEXT: [[TMP4:%.*]] = or i128 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = and i128 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 -; CHECK-NEXT: ret i64 [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[B:%.*]] to i128 +; CHECK-NEXT: ret i128 [[TMP1]] ; %tmp1 = zext i64 %a to i128 %tmp2 = zext i1 %b to i128 %tmp3 = shl nuw i128 %tmp1, 64 %tmp4 = or i128 %tmp2, %tmp3 %tmp5 = and i128 %tmp4, 1 - %tmp6 = trunc i128 %tmp5 to i64 - ret i64 %tmp6 + ret i128 %tmp5 } define i32 @shl_or_and6(i16 %a, i16 %b) { Index: test/Transforms/InstSimplify/shift.ll =================================================================== --- test/Transforms/InstSimplify/shift.ll +++ test/Transforms/InstSimplify/shift.ll @@ -175,26 +175,20 @@ ret <2 x i8> %r } -define i32 @shl_or_shr(i32 %a, i32 %b) { +define i64 @shl_or_shr(i32 %a, i32 %b) { ; CHECK-LABEL: @shl_or_shr( -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 32 -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 %a to i64 +; CHECK-NEXT: ret i64 [[TMP1]] ; %tmp1 = zext i32 %a to i64 %tmp2 = zext i32 %b to i64 %tmp3 = shl nuw i64 %tmp1, 32 %tmp4 = or i64 %tmp2, %tmp3 %tmp5 = lshr i64 %tmp4, 32 - %tmp6 = trunc i64 %tmp5 to i32 - ret i32 %tmp6 + ret i64 %tmp5 } -define i32 @shl_or_shr2(i32 %a, i32 %b) { +define i64 @shl_or_shr2(i32 %a, i32 %b) { ; Since shift count of shl is smaller than the size of %b, OR cannot be eliminated. ; CHECK-LABEL: @shl_or_shr2( ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64 @@ -202,14 +196,12 @@ ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 31 ; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 31 -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: ret i64 [[TMP5]] ; %tmp1 = zext i32 %a to i64 %tmp2 = zext i32 %b to i64 %tmp3 = shl nuw i64 %tmp1, 31 %tmp4 = or i64 %tmp2, %tmp3 %tmp5 = lshr i64 %tmp4, 31 - %tmp6 = trunc i64 %tmp5 to i32 - ret i32 %tmp6 + ret i64 %tmp5 } Index: test/Transforms/NewGVN/pair_jumpthread.ll =================================================================== --- test/Transforms/NewGVN/pair_jumpthread.ll +++ test/Transforms/NewGVN/pair_jumpthread.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -newgvn -S | FileCheck %s ; RUN: opt < %s -newgvn -jump-threading -S | FileCheck --check-prefix=CHECK-JT %s -; This test is expected to fail until the transformation is committed. -; XFAIL: * define signext i32 @testBI(i32 signext %v) { ; Test with std::pair