diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -912,9 +912,26 @@ APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt)); APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt)); - if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) || - SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1)) - return I; + if (I->getOperand(0) != I->getOperand(1)) { + if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, + Depth + 1) || + SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1)) + return I; + } else { // fshl is a rotate + // Avoid converting rotate into funnel shift. + // Only simplify if one operand is constant. + KnownBits LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, I); + if (DemandedMaskLHS.isSubsetOf(LHSKnown.Zero | LHSKnown.One)) { + replaceOperand(*I, 0, Constant::getIntegerValue(VTy, LHSKnown.One)); + return I; + } + + KnownBits RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, I); + if (DemandedMaskRHS.isSubsetOf(RHSKnown.Zero | RHSKnown.One)) { + replaceOperand(*I, 1, Constant::getIntegerValue(VTy, RHSKnown.One)); + return I; + } + } Known.Zero = LHSKnown.Zero.shl(ShiftAmt) | RHSKnown.Zero.lshr(BitWidth - ShiftAmt); diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -440,12 +440,10 @@ ret <2 x i32> %r } -; TODO: Don't let SimplifyDemandedBits split up a rotate - keep the same operand. - define i32 @rotl_common_demanded(i32 %a0) { ; CHECK-LABEL: @rotl_common_demanded( ; CHECK-NEXT: [[X:%.*]] = xor i32 [[A0:%.*]], 2 -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[A0]], i32 8) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 8) ; CHECK-NEXT: ret i32 [[R]] ; %x = xor i32 %a0, 2 @@ -456,7 +454,7 @@ define i33 @rotr_common_demanded(i33 %a0) { ; CHECK-LABEL: @rotr_common_demanded( ; CHECK-NEXT: [[X:%.*]] = xor i33 [[A0:%.*]], 2 -; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X]], i33 [[A0]], i33 25) +; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X]], i33 [[X]], i33 25) ; CHECK-NEXT: ret i33 [[R]] ; %x = xor i33 %a0, 2 @@ -704,6 +702,26 @@ ret i32 %t3 } +define i32 @fsh_andconst_rotate(i32 %a) { +; CHECK-LABEL: @fsh_andconst_rotate( +; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[A:%.*]], 16 +; CHECK-NEXT: ret i32 [[T2]] +; + %t1 = and i32 %a, 4294901760 ; 0xffff0000 + %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 16) + ret i32 %t2 +} + +define i32 @fsh_orconst_rotate(i32 %a) { +; CHECK-LABEL: @fsh_orconst_rotate( +; CHECK-NEXT: [[T2:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 -268435456, i32 4) +; CHECK-NEXT: ret i32 [[T2]] +; + %t1 = or i32 %a, 4026531840 ; 0xf0000000 + %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 4) + ret i32 %t2 +} + define <2 x i31> @fshr_mask_args_same_vector(<2 x i31> %a) { ; CHECK-LABEL: @fshr_mask_args_same_vector( ; CHECK-NEXT: [[T3:%.*]] = shl <2 x i31> [[A:%.*]],