Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -752,6 +752,33 @@ return nullptr; } +static Value *simplifyX86addcarry(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + Value *CarryIn = II.getArgOperand(0); + Value *Op1 = II.getArgOperand(1); + Value *Op2 = II.getArgOperand(2); + Type *RetTy = II.getType(); + Type *OpTy = Op1->getType(); + assert(RetTy->getStructElementType(0)->isIntegerTy(8) && + RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() && + "Unexpected types for x86 addcarry"); + + // If carry-in is zero, this is just an unsigned add with overflow. + if (match(CarryIn, m_ZeroInt())) { + Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy, + { Op1, Op2 }); + // The types have to be adjusted to match the x86 call types. + Value *UAddResult = Builder.CreateExtractValue(UAdd, 0); + Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1), + Builder.getInt8Ty()); + Value *Res = UndefValue::get(II.getType()); + Res = Builder.CreateInsertValue(Res, UAddOV, 0); + return Builder.CreateInsertValue(Res, UAddResult, 1); + } + + return nullptr; +} + static Value *simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { auto *CInt = dyn_cast(II.getArgOperand(2)); @@ -3128,6 +3155,12 @@ return nullptr; break; + case Intrinsic::x86_addcarry_32: + case Intrinsic::x86_addcarry_64: + if (Value *V = simplifyX86addcarry(*II, Builder)) + return replaceInstUsesWith(*II, V); + break; + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. // Note that ppc_altivec_vperm has a big-endian bias, so when creating Index: test/Transforms/InstCombine/X86/addcarry.ll =================================================================== --- test/Transforms/InstCombine/X86/addcarry.ll +++ test/Transforms/InstCombine/X86/addcarry.ll @@ -6,11 +6,12 @@ define i32 @no_carryin_i32(i32 %x, i32 %y, i8* %p) { ; CHECK-LABEL: @no_carryin_i32( -; CHECK-NEXT: [[S:%.*]] = call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 [[X:%.*]], i32 [[Y:%.*]]) -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i32 } [[S]], 0 -; CHECK-NEXT: store i8 [[OV]], i8* [[P:%.*]], align 1 -; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i32 } [[S]], 1 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i8 +; CHECK-NEXT: store i8 [[TMP4]], i8* [[P:%.*]], align 1 +; CHECK-NEXT: ret i32 [[TMP2]] ; %s = call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %x, i32 %y) %ov = extractvalue { i8, i32 } %s, 0 @@ -21,11 +22,12 @@ define i64 @no_carryin_i64(i64 %x, i64 %y, i8* %p) { ; CHECK-LABEL: @no_carryin_i64( -; CHECK-NEXT: [[S:%.*]] = call { i8, i64 } @llvm.x86.addcarry.64(i8 0, i64 [[X:%.*]], i64 [[Y:%.*]]) -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i64 } [[S]], 0 -; CHECK-NEXT: store i8 [[OV]], i8* [[P:%.*]], align 1 -; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i64 } [[S]], 1 -; CHECK-NEXT: ret i64 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i8 +; CHECK-NEXT: store i8 [[TMP4]], i8* [[P:%.*]], align 1 +; CHECK-NEXT: ret i64 [[TMP2]] ; %s = call { i8, i64 } @llvm.x86.addcarry.64(i8 0, i64 %x, i64 %y) %ov = extractvalue { i8, i64 } %s, 0