Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3406,6 +3406,36 @@ return II; break; } + case Intrinsic::arm_mve_asrl: + case Intrinsic::arm_mve_lsll: { + // Shift by a constant can become a standard shift. + if (auto *C = dyn_cast(II->getArgOperand(2))) { + int64_t ShiftAmt = C->getSExtValue(); + if (std::abs(ShiftAmt) < 64) { + // Recreate the 64bit value + Value *Bot = + Builder.CreateZExt(II->getArgOperand(0), Builder.getInt64Ty()); + Value *Top = Builder.CreateShl( + Builder.CreateZExt(II->getArgOperand(1), Builder.getInt64Ty()), 32); + Value *Or = Builder.CreateOr(Bot, Top); + // Create the shift + Value *NewShift = (IID == Intrinsic::arm_mve_asrl && ShiftAmt > 0) + ? Builder.CreateAShr(Or, ShiftAmt) + : (IID == Intrinsic::arm_mve_lsll && ShiftAmt < 0) + ? Builder.CreateLShr(Or, -ShiftAmt) + : Builder.CreateShl(Or, std::abs(ShiftAmt)); + // Break apart the value again, into a pair of i32s + Value *Merge = UndefValue::get(II->getType()); + Bot = Builder.CreateTrunc(NewShift, Builder.getInt32Ty()); + Merge = Builder.CreateInsertValue(Merge, Bot, 0); + Top = Builder.CreateTrunc(Builder.CreateLShr(NewShift, 32), + Builder.getInt32Ty()); + Merge = Builder.CreateInsertValue(Merge, Top, 1); + return replaceInstUsesWith(CI, Merge); + } + } + break; + } case Intrinsic::amdgcn_rcp: { Value *Src = II->getArgOperand(0); Index: llvm/test/Transforms/InstCombine/ARM/longshift.ll =================================================================== --- llvm/test/Transforms/InstCombine/ARM/longshift.ll +++ llvm/test/Transforms/InstCombine/ARM/longshift.ll @@ -9,17 +9,7 @@ define i64 @asrl_0(i64 %X) { ; CHECK-LABEL: @asrl_0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X:%.*]], 32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.asrl(i32 [[TMP2]], i32 [[TMP1]], i32 0) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 32 -; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]] -; CHECK-NEXT: ret i64 [[TMP9]] +; CHECK-NEXT: ret i64 [[X:%.*]] ; entry: %0 = lshr i64 %X, 32 @@ -38,17 +28,8 @@ define i64 @asrl_23(i64 %X) { ; CHECK-LABEL: @asrl_23( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X:%.*]], 32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.asrl(i32 [[TMP2]], i32 [[TMP1]], i32 23) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 32 -; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]] -; CHECK-NEXT: ret i64 [[TMP9]] +; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 [[X:%.*]], 23 +; CHECK-NEXT: ret i64 [[TMP0]] ; entry: %0 = lshr i64 %X, 32 @@ -67,17 +48,8 @@ define i64 @asrl_63(i64 %X) { ; CHECK-LABEL: @asrl_63( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X:%.*]], 32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.asrl(i32 [[TMP2]], i32 [[TMP1]], i32 63) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 32 -; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]] -; CHECK-NEXT: ret i64 [[TMP9]] +; CHECK-NEXT: [[TMP0:%.*]] = ashr i64 [[X:%.*]], 63 +; CHECK-NEXT: ret i64 [[TMP0]] ; entry: %0 = lshr i64 %X, 32 @@ -125,17 +97,13 @@ define i64 @asrl_m2(i64 %X) { ; CHECK-LABEL: @asrl_m2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X:%.*]], 32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.asrl(i32 [[TMP2]], i32 [[TMP1]], i32 -2) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 32 -; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]] -; CHECK-NEXT: ret i64 [[TMP9]] +; CHECK-NEXT: [[X_TR:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[X_TR]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[X]], 30 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP4]] ; entry: %0 = lshr i64 %X, 32 @@ -186,17 +154,7 @@ define i64 @lsll_0(i64 %X) { ; CHECK-LABEL: @lsll_0( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X:%.*]], 32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.lsll(i32 [[TMP2]], i32 [[TMP1]], i32 0) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 32 -; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]] -; CHECK-NEXT: ret i64 [[TMP9]] +; CHECK-NEXT: ret i64 [[X:%.*]] ; entry: %0 = lshr i64 %X, 32 @@ -215,17 +173,13 @@ define i64 @lsll_23(i64 %X) { ; CHECK-LABEL: @lsll_23( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X:%.*]], 32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.lsll(i32 [[TMP2]], i32 [[TMP1]], i32 23) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 32 -; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]] -; CHECK-NEXT: ret i64 [[TMP9]] +; CHECK-NEXT: [[X_TR:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[X_TR]], 23 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[X]], 9 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP4]] ; entry: %0 = lshr i64 %X, 32 @@ -244,17 +198,11 @@ define i64 @lsll_63(i64 %X) { ; CHECK-LABEL: @lsll_63( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X:%.*]], 32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.lsll(i32 [[TMP2]], i32 [[TMP1]], i32 63) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 32 -; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]] -; CHECK-NEXT: ret i64 [[TMP9]] +; CHECK-NEXT: [[X_TR:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[X_TR]], 31 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 32 +; CHECK-NEXT: ret i64 [[TMP2]] ; entry: %0 = lshr i64 %X, 32 @@ -302,17 +250,12 @@ define i64 @lsll_m2(i64 %X) { ; CHECK-LABEL: @lsll_m2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X:%.*]], 32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.lsll(i32 [[TMP2]], i32 [[TMP1]], i32 -2) -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 32 -; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]] -; CHECK-NEXT: ret i64 [[TMP9]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[X:%.*]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[X]], 34 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 4294967295 +; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP4]] ; entry: %0 = lshr i64 %X, 32