Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -690,6 +690,30 @@ // TODO: Could compute known zero/one bits based on the input. break; } + case Intrinsic::fshr: + case Intrinsic::fshl: { + const APInt *SA; + if (!match(I->getOperand(2), m_APInt(SA))) + break; + + // Normalize to funnel shift left. APInt shifts of BitWidth are well- + // defined, so no need to special-case zero shifts here. + uint64_t ShiftAmt = SA->urem(BitWidth); + if (II->getIntrinsicID() == Intrinsic::fshr) + ShiftAmt = BitWidth - ShiftAmt; + + APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt)); + APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt)); + if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1)) + return I; + + Known.Zero = LHSKnown.Zero.shl(ShiftAmt) | + RHSKnown.Zero.lshr(BitWidth - ShiftAmt); + Known.One = LHSKnown.One.shl(ShiftAmt) | + RHSKnown.One.lshr(BitWidth - ShiftAmt); + break; + } case Intrinsic::x86_mmx_pmovmskb: case Intrinsic::x86_sse_movmsk_ps: case Intrinsic::x86_sse2_movmsk_pd: Index: llvm/trunk/test/Transforms/InstCombine/fsh.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/fsh.ll +++ llvm/trunk/test/Transforms/InstCombine/fsh.ll @@ -255,7 +255,7 @@ define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) { ; CHECK-LABEL: @fshl_only_op0_demanded( -; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7) +; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X:%.*]], 7 ; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 128 ; CHECK-NEXT: ret i32 [[R]] ; @@ -266,7 +266,7 @@ define i32 @fshl_only_op1_demanded(i32 %x, i32 %y) { ; CHECK-LABEL: @fshl_only_op1_demanded( -; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7) +; CHECK-NEXT: [[Z:%.*]] = lshr i32 [[Y:%.*]], 25 ; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 63 ; CHECK-NEXT: ret i32 [[R]] ; @@ -275,9 +275,9 @@ ret i32 %r } -define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) { -; CHECK-LABEL: @fshr_only_op0_demanded( -; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7) +define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) { +; CHECK-LABEL: @fshr_only_op1_demanded( +; CHECK-NEXT: [[Z:%.*]] = lshr i33 [[Y:%.*]], 7 ; CHECK-NEXT: [[R:%.*]] = and i33 [[Z]], 12392 ; CHECK-NEXT: ret i33 [[R]] ; @@ -286,10 +286,10 @@ ret i33 %r } -define i33 @fshr_only_op1_demanded(i33 %x, i33 %y) { -; CHECK-LABEL: @fshr_only_op1_demanded( -; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7) -; CHECK-NEXT: [[R:%.*]] = lshr i33 [[Z]], 30 +define i33 @fshr_only_op0_demanded(i33 %x, i33 %y) { +; CHECK-LABEL: @fshr_only_op0_demanded( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i33 [[X:%.*]], 4 +; CHECK-NEXT: [[R:%.*]] = and i33 [[TMP1]], 7 ; CHECK-NEXT: ret i33 [[R]] ; %z = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 7) @@ -297,6 +297,29 @@ ret i33 %r } +define <2 x i31> @fshl_only_op1_demanded_vec_splat(<2 x i31> %x, <2 x i31> %y) { +; CHECK-LABEL: @fshl_only_op1_demanded_vec_splat( +; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], +; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], +; CHECK-NEXT: ret <2 x i31> [[R]] +; + %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> ) + %r = and <2 x i31> %z, + ret <2 x i31> %r +} + +; The shift modulo bitwidth is the same for all vector elements, but this is not simplified yet. +define <2 x i31> @fshl_only_op1_demanded_vec_nonsplat(<2 x i31> %x, <2 x i31> %y) { +; CHECK-LABEL: @fshl_only_op1_demanded_vec_nonsplat( +; CHECK-NEXT: [[Z:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> ) +; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], +; CHECK-NEXT: ret <2 x i31> [[R]] +; + %z = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> ) + %r = and <2 x i31> %z, + ret <2 x i31> %r +} + ; Demand bits from both operands -- cannot simplify. define i32 @fshl_both_ops_demanded(i32 %x, i32 %y) { @@ -325,11 +348,7 @@ define i32 @fshl_known_bits(i32 %x, i32 %y) { ; CHECK-LABEL: @fshl_known_bits( -; CHECK-NEXT: [[X2:%.*]] = or i32 [[X:%.*]], 1 -; CHECK-NEXT: [[Y2:%.*]] = lshr i32 [[Y:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = call i32 @llvm.fshl.i32(i32 [[X2]], i32 [[Y2]], i32 7) -; CHECK-NEXT: [[R:%.*]] = and i32 [[Z]], 192 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: ret i32 128 ; %x2 = or i32 %x, 1 ; lo bit set %y2 = lshr i32 %y, 1 ; hi bit clear @@ -340,11 +359,7 @@ define i33 @fshr_known_bits(i33 %x, i33 %y) { ; CHECK-LABEL: @fshr_known_bits( -; CHECK-NEXT: [[X2:%.*]] = or i33 [[X:%.*]], 1 -; CHECK-NEXT: [[Y2:%.*]] = lshr i33 [[Y:%.*]], 1 -; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshr.i33(i33 [[X2]], i33 [[Y2]], i33 26) -; CHECK-NEXT: [[R:%.*]] = and i33 [[Z]], 192 -; CHECK-NEXT: ret i33 [[R]] +; CHECK-NEXT: ret i33 128 ; %x2 = or i33 %x, 1 ; lo bit set %y2 = lshr i33 %y, 1 ; hi bit set