Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1992,6 +1992,29 @@ case Intrinsic::fshl: case Intrinsic::fshr: { + const APInt *SA; + if (match(II->getArgOperand(2), m_APInt(SA))) { + Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1); + unsigned BitWidth = SA->getBitWidth(); + uint64_t ShiftAmt = SA->urem(BitWidth); + assert(ShiftAmt != 0 && "SimplifyCall should have handled zero shift"); + // Normalize to funnel shift left. + if (II->getIntrinsicID() == Intrinsic::fshr) + ShiftAmt = BitWidth - ShiftAmt; + + // fshl(X, 0, C) -> shl X, C + // fshl(X, undef, C) -> shl X, C + if (match(Op1, m_Zero()) || match(Op1, m_Undef())) + return BinaryOperator::CreateShl( + Op0, ConstantInt::get(II->getType(), ShiftAmt)); + + // fshl(0, X, C) -> lshr X, (BW-C) + // fshl(undef, X, C) -> lshr X, (BW-C) + if (match(Op0, m_Zero()) || match(Op0, m_Undef())) + return BinaryOperator::CreateLShr( + Op1, ConstantInt::get(II->getType(), BitWidth - ShiftAmt)); + } + // The shift amount (operand 2) of a funnel shift is modulo the bitwidth, // so only the low bits of the shift amount are demanded if the bitwidth is // a power-of-2. Index: llvm/trunk/test/Transforms/InstCombine/fsh.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/fsh.ll +++ llvm/trunk/test/Transforms/InstCombine/fsh.ll @@ -141,11 +141,11 @@ ret <2 x i31> %r } -; Simplify one undef operand and constant shift amount. +; Simplify one undef or zero operand and constant shift amount. define i32 @fshl_op0_undef(i32 %x) { ; CHECK-LABEL: @fshl_op0_undef( -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 undef, i32 [[X:%.*]], i32 7) +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[X:%.*]], 25 ; CHECK-NEXT: ret i32 [[R]] ; %r = call i32 @llvm.fshl.i32(i32 undef, i32 %x, i32 7) @@ -154,7 +154,7 @@ define i32 @fshl_op0_zero(i32 %x) { ; CHECK-LABEL: @fshl_op0_zero( -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 0, i32 [[X:%.*]], i32 7) +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[X:%.*]], 25 ; CHECK-NEXT: ret i32 [[R]] ; %r = call i32 @llvm.fshl.i32(i32 0, i32 %x, i32 7) @@ -163,7 +163,7 @@ define i33 @fshr_op0_undef(i33 %x) { ; CHECK-LABEL: @fshr_op0_undef( -; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 undef, i33 [[X:%.*]], i33 7) +; CHECK-NEXT: [[R:%.*]] = lshr i33 [[X:%.*]], 7 ; CHECK-NEXT: ret i33 [[R]] ; %r = call i33 @llvm.fshr.i33(i33 undef, i33 %x, i33 7) @@ -172,7 +172,7 @@ define i33 @fshr_op0_zero(i33 %x) { ; CHECK-LABEL: @fshr_op0_zero( -; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 0, i33 [[X:%.*]], i33 7) +; CHECK-NEXT: [[R:%.*]] = lshr i33 [[X:%.*]], 7 ; CHECK-NEXT: ret i33 [[R]] ; %r = call i33 @llvm.fshr.i33(i33 0, i33 %x, i33 7) @@ -181,7 +181,7 @@ define i32 @fshl_op1_undef(i32 %x) { ; CHECK-LABEL: @fshl_op1_undef( -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 undef, i32 7) +; CHECK-NEXT: [[R:%.*]] = shl i32 [[X:%.*]], 7 ; CHECK-NEXT: ret i32 [[R]] ; %r = call i32 @llvm.fshl.i32(i32 %x, i32 undef, i32 7) @@ -190,7 +190,7 @@ define i32 @fshl_op1_zero(i32 %x) { ; CHECK-LABEL: @fshl_op1_zero( -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 0, i32 7) +; CHECK-NEXT: [[R:%.*]] = shl i32 [[X:%.*]], 7 ; CHECK-NEXT: ret i32 [[R]] ; %r = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 7) @@ -199,7 +199,7 @@ define i33 @fshr_op1_undef(i33 %x) { ; CHECK-LABEL: @fshr_op1_undef( -; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 undef, i33 7) +; CHECK-NEXT: [[R:%.*]] = shl i33 [[X:%.*]], 26 ; CHECK-NEXT: ret i33 [[R]] ; %r = call i33 @llvm.fshr.i33(i33 %x, i33 undef, i33 7) @@ -208,13 +208,49 @@ define i33 @fshr_op1_zero(i33 %x) { ; CHECK-LABEL: @fshr_op1_zero( -; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 0, i33 7) +; CHECK-NEXT: [[R:%.*]] = shl i33 [[X:%.*]], 26 ; CHECK-NEXT: ret i33 [[R]] ; %r = call i33 @llvm.fshr.i33(i33 %x, i33 0, i33 7) ret i33 %r } +define <2 x i31> @fshl_op0_zero_vec(<2 x i31> %x) { +; CHECK-LABEL: @fshl_op0_zero_vec( +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i31> [[X:%.*]], +; CHECK-NEXT: ret <2 x i31> [[R]] +; + %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> zeroinitializer, <2 x i31> %x, <2 x i31> ) + ret <2 x i31> %r +} + +define <2 x i31> @fshl_op1_undef_vec(<2 x i31> %x) { +; CHECK-LABEL: @fshl_op1_undef_vec( +; CHECK-NEXT: [[R:%.*]] = shl <2 x i31> [[X:%.*]], +; CHECK-NEXT: ret <2 x i31> [[R]] +; + %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> undef, <2 x i31> ) + ret <2 x i31> %r +} + +define <2 x i32> @fshr_op0_undef_vec(<2 x i32> %x) { +; CHECK-LABEL: @fshr_op0_undef_vec( +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> ) + ret <2 x i32> %r +} + +define <2 x i32> @fshr_op1_zero_vec(<2 x i32> %x) { +; CHECK-LABEL: @fshr_op1_zero_vec( +; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> ) + ret <2 x i32> %r +} + ; Only demand bits from one of the operands. define i32 @fshl_only_op0_demanded(i32 %x, i32 %y) {