Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1992,6 +1992,30 @@ case Intrinsic::fshl: case Intrinsic::fshr: { + const APInt *SA; + if (match(II->getArgOperand(2), m_APInt(SA))) { + unsigned BitWidth = SA->getBitWidth(); + uint64_t ShiftAmt = SA->urem(BitWidth); + // Zero shift is already handled in simplification. + if (ShiftAmt != 0) { + Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1); + // Normalize to funnel shift left. + if (II->getIntrinsicID() == Intrinsic::fshr) + ShiftAmt = BitWidth - ShiftAmt; + + // fshl(X, 0, C) -> shl X, C + // fshl(X, undef, C) -> shl X, C + if (match(Op1, m_Zero()) || match(Op1, m_Undef())) + return replaceInstUsesWith(*II, Builder.CreateShl(Op0, ShiftAmt)); + + // fshl(0, X, C) -> lshr X, (BW-C) + // fshl(undef, X, C) -> lshr X, (BW-C) + if (match(Op0, m_Zero()) || match(Op0, m_Undef())) + return replaceInstUsesWith(*II, + Builder.CreateLShr(Op1, BitWidth - ShiftAmt)); + } + } + // The shift amount (operand 2) of a funnel shift is modulo the bitwidth, // so only the low bits of the shift amount are demanded if the bitwidth is // a power-of-2. Index: test/Transforms/InstCombine/fsh.ll =================================================================== --- test/Transforms/InstCombine/fsh.ll +++ test/Transforms/InstCombine/fsh.ll @@ -145,8 +145,8 @@ define i32 @fshl_op0_undef(i32 %x) { ; CHECK-LABEL: @fshl_op0_undef( -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 undef, i32 [[X:%.*]], i32 7) -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 25 +; CHECK-NEXT: ret i32 [[TMP1]] ; %r = call i32 @llvm.fshl.i32(i32 undef, i32 %x, i32 7) ret i32 %r @@ -154,8 +154,8 @@ define i32 @fshl_op0_zero(i32 %x) { ; CHECK-LABEL: @fshl_op0_zero( -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 0, i32 [[X:%.*]], i32 7) -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 25 +; CHECK-NEXT: ret i32 [[TMP1]] ; %r = call i32 @llvm.fshl.i32(i32 0, i32 %x, i32 7) ret i32 %r @@ -163,8 +163,8 @@ define i33 @fshr_op0_undef(i33 %x) { ; CHECK-LABEL: @fshr_op0_undef( -; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 undef, i33 [[X:%.*]], i33 7) -; CHECK-NEXT: ret i33 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i33 [[X:%.*]], 7 +; CHECK-NEXT: ret i33 [[TMP1]] ; %r = call i33 @llvm.fshr.i33(i33 undef, i33 %x, i33 7) ret i33 %r @@ -172,8 +172,8 @@ define i33 @fshr_op0_zero(i33 %x) { ; CHECK-LABEL: @fshr_op0_zero( -; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 0, i33 [[X:%.*]], i33 7) -; CHECK-NEXT: ret i33 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i33 [[X:%.*]], 7 +; CHECK-NEXT: ret i33 [[TMP1]] ; %r = call i33 @llvm.fshr.i33(i33 0, i33 %x, i33 7) ret i33 %r @@ -181,8 +181,8 @@ define i32 @fshl_op1_undef(i32 %x) { ; CHECK-LABEL: @fshl_op1_undef( -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 undef, i32 7) -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 7 +; CHECK-NEXT: ret i32 [[TMP1]] ; %r = call i32 @llvm.fshl.i32(i32 %x, i32 undef, i32 7) ret i32 %r @@ -190,8 +190,8 @@ define i32 @fshl_op1_zero(i32 %x) { ; CHECK-LABEL: @fshl_op1_zero( -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 0, i32 7) -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 7 +; CHECK-NEXT: ret i32 [[TMP1]] ; %r = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 7) ret i32 %r @@ -199,8 +199,8 @@ define i33 @fshr_op1_undef(i33 %x) { ; CHECK-LABEL: @fshr_op1_undef( -; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 undef, i33 7) -; CHECK-NEXT: ret i33 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i33 [[X:%.*]], 26 +; CHECK-NEXT: ret i33 [[TMP1]] ; %r = call i33 @llvm.fshr.i33(i33 %x, i33 undef, i33 7) ret i33 %r @@ -208,8 +208,8 @@ define i33 @fshr_op1_zero(i33 %x) { ; CHECK-LABEL: @fshr_op1_zero( -; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 0, i33 7) -; CHECK-NEXT: ret i33 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i33 [[X:%.*]], 26 +; CHECK-NEXT: ret i33 [[TMP1]] ; %r = call i33 @llvm.fshr.i33(i33 %x, i33 0, i33 7) ret i33 %r