Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -2726,24 +2726,24 @@ // ADX let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_addcarryx_u32: GCCBuiltin<"__builtin_ia32_addcarryx_u32">, - Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty, - llvm_ptr_ty], [IntrArgMemOnly]>; - def int_x86_addcarryx_u64: GCCBuiltin<"__builtin_ia32_addcarryx_u64">, - Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty, - llvm_ptr_ty], [IntrArgMemOnly]>; - def int_x86_addcarry_u32: GCCBuiltin<"__builtin_ia32_addcarry_u32">, - Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty, - llvm_ptr_ty], [IntrArgMemOnly]>; - def int_x86_addcarry_u64: GCCBuiltin<"__builtin_ia32_addcarry_u64">, - Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty, - llvm_ptr_ty], [IntrArgMemOnly]>; - def int_x86_subborrow_u32: GCCBuiltin<"__builtin_ia32_subborrow_u32">, - Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty, - llvm_ptr_ty], [IntrArgMemOnly]>; - def int_x86_subborrow_u64: GCCBuiltin<"__builtin_ia32_subborrow_u64">, - Intrinsic<[llvm_i8_ty], [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty, - llvm_ptr_ty], [IntrArgMemOnly]>; + def int_x86_addcarryx_u32: + Intrinsic<[llvm_i8_ty, llvm_i32_ty], + [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_addcarryx_u64: + Intrinsic<[llvm_i8_ty, llvm_i64_ty], + [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_addcarry_u32: + Intrinsic<[llvm_i8_ty, llvm_i32_ty], + [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_addcarry_u64: + Intrinsic<[llvm_i8_ty, llvm_i64_ty], + [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_subborrow_u32: + Intrinsic<[llvm_i8_ty, llvm_i32_ty], + [llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_subborrow_u64: + Intrinsic<[llvm_i8_ty, llvm_i64_ty], + [llvm_i8_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -65,6 +65,17 @@ return true; } +static bool UpgradeADCSBBIntrinsic(Function *F, Intrinsic::ID IID, + Function *&NewFn) { + // If this intrinsic has 3 operands, it's the new version. + if (F->getFunctionType()->getNumParams() == 3) + return false; + + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), IID); + return true; +} + static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { // All of the intrinsics matches below should be marked with which llvm // version started autoupgrading them. At some point in the future we would @@ -371,6 +382,19 @@ return true; } + if (Name == "addcarryx.u32") + return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarryx_u32, NewFn); + if (Name == "addcarryx.u64") + return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarryx_u64, NewFn); + if (Name == "addcarry.u32") + return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarry_u32, NewFn); + if (Name == "addcarry.u64") + return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_addcarry_u64, NewFn); + if (Name == "subborrow.u32") + return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_subborrow_u32, NewFn); + if (Name == "subborrow.u64") + return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_subborrow_u64, NewFn); + // SSE4.1 ptest functions may have an old signature. if (Name.startswith("sse41.ptest")) { // Added in 3.2 if (Name.substr(11) == "c") @@ -3417,6 +3441,40 @@ break; } + case Intrinsic::x86_addcarryx_u32: + case Intrinsic::x86_addcarryx_u64: + case Intrinsic::x86_addcarry_u32: + case Intrinsic::x86_addcarry_u64: + case Intrinsic::x86_subborrow_u32: + case Intrinsic::x86_subborrow_u64: { + // This used to take 4 arguments. If we only have 3 arguments its already + // upgraded. + if (CI->getNumOperands() == 3) + return; + + // Make a call with 3 operands. + NewCall = Builder.CreateCall(NewFn, { CI->getArgOperand(0), + CI->getArgOperand(1), + CI->getArgOperand(2)}); + // Extract the second result and store it. + Value *Data = Builder.CreateExtractValue(NewCall, 1); + // Cast the pointer to the right type. + Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3), + llvm::PointerType::getUnqual(Data->getType())); + Builder.CreateAlignedStore(Data, Ptr, 1); + // Replace the original call result with the first result of the new call. + Value *CF = Builder.CreateExtractValue(NewCall, 0); + + std::string Name = CI->getName(); + if (!Name.empty()) { + CI->setName(Name + ".old"); + NewCall->setName(Name); + } + CI->replaceAllUsesWith(CF); + CI->eraseFromParent(); + return; + } + case Intrinsic::x86_sse41_insertps: case Intrinsic::x86_sse41_dppd: case Intrinsic::x86_sse41_dpps: Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -21286,6 +21286,18 @@ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), RoundingMode); } + // ADC/ADCX/SBB + case ADX: { + SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); + SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32); + SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1), + DAG.getConstant(-1, dl, MVT::i8)); + SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2), + Op.getOperand(3), GenCF.getValue(1)); + SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG); + SDValue Results[] = { SetCC, Res }; + return DAG.getMergeValues(Results, dl); + } default: break; } @@ -21990,20 +22002,6 @@ return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Ret, SDValue(InTrans.getNode(), 1)); } - // ADC/ADCX/SBB - case ADX: { - SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); - SDVTList VTs = DAG.getVTList(Op.getOperand(3).getValueType(), MVT::i32); - SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2), - DAG.getConstant(-1, dl, MVT::i8)); - SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3), - Op.getOperand(4), GenCF.getValue(1)); - SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0), - Op.getOperand(5), MachinePointerInfo()); - SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG); - SDValue Results[] = { SetCC, Store }; - return DAG.getMergeValues(Results, dl); - } case TRUNCATE_TO_MEM_VI8: case TRUNCATE_TO_MEM_VI16: case TRUNCATE_TO_MEM_VI32: { Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -64,11 +64,6 @@ * the alphabetical order. */ static const IntrinsicData IntrinsicsWithChain[] = { - X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, 0), - X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0), - X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0), - X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0), - X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, X86::VPGATHERDDrm, 0), X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, X86::VPGATHERDDYrm, 0), X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, X86::VGATHERDPDrm, 0), @@ -270,9 +265,6 @@ X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0), X86_INTRINSIC_DATA(rdtsc, RDTSC, X86ISD::RDTSC_DAG, 0), X86_INTRINSIC_DATA(rdtscp, RDTSC, X86ISD::RDTSCP_DAG, 0), - - X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0), - X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0), X86_INTRINSIC_DATA(xgetbv, XGETBV, X86::XGETBV, 0), X86_INTRINSIC_DATA(xtest, XTEST, X86ISD::XTEST, 0), }; @@ -294,6 +286,10 @@ * the alphabetical order. */ static const IntrinsicData IntrinsicsWithoutChain[] = { + X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, 0), + X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0), + X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0), + X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0), X86_INTRINSIC_DATA(avx_addsub_pd_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0), X86_INTRINSIC_DATA(avx_addsub_ps_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0), X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0), @@ -1225,6 +1221,8 @@ X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0), X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0), X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), + X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0), + X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0), X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), Index: test/CodeGen/X86/adx-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/adx-intrinsics-upgrade.ll +++ test/CodeGen/X86/adx-intrinsics-upgrade.ll @@ -9,16 +9,16 @@ ; NOADX: ## %bb.0: ; NOADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; NOADX-NEXT: adcl %edx, %esi ## encoding: [0x11,0xd6] -; NOADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; NOADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; NOADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; NOADX-NEXT: retq ## encoding: [0xc3] ; ; ADX-LABEL: test_addcarryx_u32: ; ADX: ## %bb.0: ; ADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; ADX-NEXT: adcxl %edx, %esi ## encoding: [0x66,0x0f,0x38,0xf6,0xf2] -; ADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; ADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; ADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; ADX-NEXT: retq ## encoding: [0xc3] %ret = tail call i8 @llvm.x86.addcarryx.u32(i8 %c, i32 %a, i32 %b, i8* %ptr) ret i8 %ret; @@ -31,16 +31,16 @@ ; NOADX: ## %bb.0: ; NOADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; NOADX-NEXT: adcq %rdx, %rsi ## encoding: [0x48,0x11,0xd6] -; NOADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; NOADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; NOADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; NOADX-NEXT: retq ## encoding: [0xc3] ; ; ADX-LABEL: test_addcarryx_u64: ; ADX: ## %bb.0: ; ADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; ADX-NEXT: adcxq %rdx, %rsi ## encoding: [0x66,0x48,0x0f,0x38,0xf6,0xf2] -; ADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; ADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; ADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; ADX-NEXT: retq ## encoding: [0xc3] %ret = tail call i8 @llvm.x86.addcarryx.u64(i8 %c, i64 %a, i64 %b, i8* %ptr) ret i8 %ret; @@ -53,16 +53,16 @@ ; NOADX: ## %bb.0: ; NOADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; NOADX-NEXT: adcl %edx, %esi ## encoding: [0x11,0xd6] -; NOADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; NOADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; NOADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; NOADX-NEXT: retq ## encoding: [0xc3] ; ; ADX-LABEL: test_addcarry_u32: ; ADX: ## %bb.0: ; ADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; ADX-NEXT: adcxl %edx, %esi ## encoding: [0x66,0x0f,0x38,0xf6,0xf2] -; ADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; ADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; ADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; ADX-NEXT: retq ## encoding: [0xc3] %ret = tail call i8 @llvm.x86.addcarry.u32(i8 %c, i32 %a, i32 %b, i8* %ptr) ret i8 %ret; @@ -75,16 +75,16 @@ ; NOADX: ## %bb.0: ; NOADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; NOADX-NEXT: adcq %rdx, %rsi ## encoding: [0x48,0x11,0xd6] -; NOADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; NOADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; NOADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; NOADX-NEXT: retq ## encoding: [0xc3] ; ; ADX-LABEL: test_addcarry_u64: ; ADX: ## %bb.0: ; ADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; ADX-NEXT: adcxq %rdx, %rsi ## encoding: [0x66,0x48,0x0f,0x38,0xf6,0xf2] -; ADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; ADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; ADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; ADX-NEXT: retq ## encoding: [0xc3] %ret = tail call i8 @llvm.x86.addcarry.u64(i8 %c, i64 %a, i64 %b, i8* %ptr) ret i8 %ret; @@ -97,8 +97,8 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; CHECK-NEXT: sbbl %edx, %esi ## encoding: [0x19,0xd6] -; CHECK-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; CHECK-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; CHECK-NEXT: retq ## encoding: [0xc3] %ret = tail call i8 @llvm.x86.subborrow.u32(i8 %c, i32 %a, i32 %b, i8* %ptr) ret i8 %ret; @@ -111,8 +111,8 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; CHECK-NEXT: sbbq %rdx, %rsi ## encoding: [0x48,0x19,0xd6] -; CHECK-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; CHECK-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; CHECK-NEXT: retq ## encoding: [0xc3] %ret = tail call i8 @llvm.x86.subborrow.u64(i8 %c, i64 %a, i64 %b, i8* %ptr) ret i8 %ret; @@ -126,9 +126,9 @@ ; NOADX-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9] ; NOADX-NEXT: addb $-1, %cl ## encoding: [0x80,0xc1,0xff] ; NOADX-NEXT: adcq (%rsi), %rax ## encoding: [0x48,0x13,0x06] +; NOADX-NEXT: setb %cl ## encoding: [0x0f,0x92,0xc1] ; NOADX-NEXT: movq %rax, (%rdx) ## encoding: [0x48,0x89,0x02] -; NOADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] -; NOADX-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0] +; NOADX-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1] ; NOADX-NEXT: retq ## encoding: [0xc3] ; ; ADX-LABEL: load_crash: @@ -137,9 +137,9 @@ ; ADX-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9] ; ADX-NEXT: addb $-1, %cl ## encoding: [0x80,0xc1,0xff] ; ADX-NEXT: adcxq (%rsi), %rax ## encoding: [0x66,0x48,0x0f,0x38,0xf6,0x06] +; ADX-NEXT: setb %cl ## encoding: [0x0f,0x92,0xc1] ; ADX-NEXT: movq %rax, (%rdx) ## encoding: [0x48,0x89,0x02] -; ADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] -; ADX-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0] +; ADX-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1] ; ADX-NEXT: retq ## encoding: [0xc3] %1 = load i64, i64* %a, align 8 %2 = load i64, i64* %b, align 8 Index: test/CodeGen/X86/adx-intrinsics.ll =================================================================== --- test/CodeGen/X86/adx-intrinsics.ll +++ test/CodeGen/X86/adx-intrinsics.ll @@ -2,120 +2,144 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=NOADX ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=ADX -declare i8 @llvm.x86.addcarryx.u32(i8, i32, i32, i8*) +declare { i8, i32 } @llvm.x86.addcarryx.u32(i8, i32, i32) define i8 @test_addcarryx_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) { ; NOADX-LABEL: test_addcarryx_u32: ; NOADX: ## %bb.0: ; NOADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; NOADX-NEXT: adcl %edx, %esi ## encoding: [0x11,0xd6] -; NOADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; NOADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; NOADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; NOADX-NEXT: retq ## encoding: [0xc3] ; ; ADX-LABEL: test_addcarryx_u32: ; ADX: ## %bb.0: ; ADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; ADX-NEXT: adcxl %edx, %esi ## encoding: [0x66,0x0f,0x38,0xf6,0xf2] -; ADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; ADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; ADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; ADX-NEXT: retq ## encoding: [0xc3] - %ret = tail call i8 @llvm.x86.addcarryx.u32(i8 %c, i32 %a, i32 %b, i8* %ptr) - ret i8 %ret; + %ret = call { i8, i32 } @llvm.x86.addcarryx.u32(i8 %c, i32 %a, i32 %b) + %1 = extractvalue { i8, i32 } %ret, 1 + %2 = bitcast i8* %ptr to i32* + store i32 %1, i32* %2, align 1 + %3 = extractvalue { i8, i32 } %ret, 0 + ret i8 %3 } -declare i8 @llvm.x86.addcarryx.u64(i8, i64, i64, i8*) +declare { i8, i64 } @llvm.x86.addcarryx.u64(i8, i64, i64) define i8 @test_addcarryx_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { ; NOADX-LABEL: test_addcarryx_u64: ; NOADX: ## %bb.0: ; NOADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; NOADX-NEXT: adcq %rdx, %rsi ## encoding: [0x48,0x11,0xd6] -; NOADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; NOADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; NOADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; NOADX-NEXT: retq ## encoding: [0xc3] ; ; ADX-LABEL: test_addcarryx_u64: ; ADX: ## %bb.0: ; ADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; ADX-NEXT: adcxq %rdx, %rsi ## encoding: [0x66,0x48,0x0f,0x38,0xf6,0xf2] -; ADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; ADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; ADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; ADX-NEXT: retq ## encoding: [0xc3] - %ret = tail call i8 @llvm.x86.addcarryx.u64(i8 %c, i64 %a, i64 %b, i8* %ptr) - ret i8 %ret; + %ret = call { i8, i64 } @llvm.x86.addcarryx.u64(i8 %c, i64 %a, i64 %b) + %1 = extractvalue { i8, i64 } %ret, 1 + %2 = bitcast i8* %ptr to i64* + store i64 %1, i64* %2, align 1 + %3 = extractvalue { i8, i64 } %ret, 0 + ret i8 %3 } -declare i8 @llvm.x86.addcarry.u32(i8, i32, i32, i8*) +declare { i8, i32 } @llvm.x86.addcarry.u32(i8, i32, i32) define i8 @test_addcarry_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) { ; NOADX-LABEL: test_addcarry_u32: ; NOADX: ## %bb.0: ; NOADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; NOADX-NEXT: adcl %edx, %esi ## encoding: [0x11,0xd6] -; NOADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; NOADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; NOADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; NOADX-NEXT: retq ## encoding: [0xc3] ; ; ADX-LABEL: test_addcarry_u32: ; ADX: ## %bb.0: ; ADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; ADX-NEXT: adcxl %edx, %esi ## encoding: [0x66,0x0f,0x38,0xf6,0xf2] -; ADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; ADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; ADX-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; ADX-NEXT: retq ## encoding: [0xc3] - %ret = tail call i8 @llvm.x86.addcarry.u32(i8 %c, i32 %a, i32 %b, i8* %ptr) - ret i8 %ret; + %ret = call { i8, i32 } @llvm.x86.addcarry.u32(i8 %c, i32 %a, i32 %b) + %1 = extractvalue { i8, i32 } %ret, 1 + %2 = bitcast i8* %ptr to i32* + store i32 %1, i32* %2, align 1 + %3 = extractvalue { i8, i32 } %ret, 0 + ret i8 %3 } -declare i8 @llvm.x86.addcarry.u64(i8, i64, i64, i8*) +declare { i8, i64 } @llvm.x86.addcarry.u64(i8, i64, i64) define i8 @test_addcarry_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { ; NOADX-LABEL: test_addcarry_u64: ; NOADX: ## %bb.0: ; NOADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; NOADX-NEXT: adcq %rdx, %rsi ## encoding: [0x48,0x11,0xd6] -; NOADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; NOADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; NOADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; NOADX-NEXT: retq ## encoding: [0xc3] ; ; ADX-LABEL: test_addcarry_u64: ; ADX: ## %bb.0: ; ADX-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; ADX-NEXT: adcxq %rdx, %rsi ## encoding: [0x66,0x48,0x0f,0x38,0xf6,0xf2] -; ADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; ADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; ADX-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; ADX-NEXT: retq ## encoding: [0xc3] - %ret = tail call i8 @llvm.x86.addcarry.u64(i8 %c, i64 %a, i64 %b, i8* %ptr) - ret i8 %ret; + %ret = call { i8, i64 } @llvm.x86.addcarry.u64(i8 %c, i64 %a, i64 %b) + %1 = extractvalue { i8, i64 } %ret, 1 + %2 = bitcast i8* %ptr to i64* + store i64 %1, i64* %2, align 1 + %3 = extractvalue { i8, i64 } %ret, 0 + ret i8 %3 } -declare i8 @llvm.x86.subborrow.u32(i8, i32, i32, i8*) +declare { i8, i32 } @llvm.x86.subborrow.u32(i8, i32, i32) define i8 @test_subborrow_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) { ; CHECK-LABEL: test_subborrow_u32: ; CHECK: ## %bb.0: ; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; CHECK-NEXT: sbbl %edx, %esi ## encoding: [0x19,0xd6] -; CHECK-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; CHECK-NEXT: movl %esi, (%rcx) ## encoding: [0x89,0x31] ; CHECK-NEXT: retq ## encoding: [0xc3] - %ret = tail call i8 @llvm.x86.subborrow.u32(i8 %c, i32 %a, i32 %b, i8* %ptr) - ret i8 %ret; + %ret = call { i8, i32 } @llvm.x86.subborrow.u32(i8 %c, i32 %a, i32 %b) + %1 = extractvalue { i8, i32 } %ret, 1 + %2 = bitcast i8* %ptr to i32* + store i32 %1, i32* %2, align 1 + %3 = extractvalue { i8, i32 } %ret, 0 + ret i8 %3 } -declare i8 @llvm.x86.subborrow.u64(i8, i64, i64, i8*) +declare { i8, i64 } @llvm.x86.subborrow.u64(i8, i64, i64) define i8 @test_subborrow_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) { ; CHECK-LABEL: test_subborrow_u64: ; CHECK: ## %bb.0: ; CHECK-NEXT: addb $-1, %dil ## encoding: [0x40,0x80,0xc7,0xff] ; CHECK-NEXT: sbbq %rdx, %rsi ## encoding: [0x48,0x19,0xd6] -; CHECK-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] +; CHECK-NEXT: movq %rsi, (%rcx) ## encoding: [0x48,0x89,0x31] ; CHECK-NEXT: retq ## encoding: [0xc3] - %ret = tail call i8 @llvm.x86.subborrow.u64(i8 %c, i64 %a, i64 %b, i8* %ptr) - ret i8 %ret; + %ret = call { i8, i64 } @llvm.x86.subborrow.u64(i8 %c, i64 %a, i64 %b) + %1 = extractvalue { i8, i64 } %ret, 1 + %2 = bitcast i8* %ptr to i64* + store i64 %1, i64* %2, align 1 + %3 = extractvalue { i8, i64 } %ret, 0 + ret i8 %3 } ; Try a version with loads. Previously we crashed on this. @@ -126,9 +150,9 @@ ; NOADX-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9] ; NOADX-NEXT: addb $-1, %cl ## encoding: [0x80,0xc1,0xff] ; NOADX-NEXT: adcq (%rsi), %rax ## encoding: [0x48,0x13,0x06] +; NOADX-NEXT: setb %cl ## encoding: [0x0f,0x92,0xc1] ; NOADX-NEXT: movq %rax, (%rdx) ## encoding: [0x48,0x89,0x02] -; NOADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] -; NOADX-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0] +; NOADX-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1] ; NOADX-NEXT: retq ## encoding: [0xc3] ; ; ADX-LABEL: load_crash: @@ -137,15 +161,19 @@ ; ADX-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9] ; ADX-NEXT: addb $-1, %cl ## encoding: [0x80,0xc1,0xff] ; ADX-NEXT: adcxq (%rsi), %rax ## encoding: [0x66,0x48,0x0f,0x38,0xf6,0x06] +; ADX-NEXT: setb %cl ## encoding: [0x0f,0x92,0xc1] ; ADX-NEXT: movq %rax, (%rdx) ## encoding: [0x48,0x89,0x02] -; ADX-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0] -; ADX-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0] +; ADX-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1] ; ADX-NEXT: retq ## encoding: [0xc3] %1 = load i64, i64* %a, align 8 %2 = load i64, i64* %b, align 8 %3 = bitcast i64* %res to i8* - %4 = tail call i8 @llvm.x86.addcarryx.u64(i8 0, i64 %1, i64 %2, i8* %3) - %conv = zext i8 %4 to i32 + %4 = call { i8, i64 } @llvm.x86.addcarryx.u64(i8 0, i64 %1, i64 %2) + %5 = extractvalue { i8, i64 } %4, 1 + %6 = bitcast i8* %3 to i64* + store i64 %5, i64* %6, align 1 + %7 = extractvalue { i8, i64 } %4, 0 + %conv = zext i8 %7 to i32 ret i32 %conv } @@ -160,6 +188,9 @@ ; CHECK-NEXT: movq %rax, 0 ## encoding: [0x48,0x89,0x04,0x25,0x00,0x00,0x00,0x00] ; CHECK-NEXT: retq ## encoding: [0xc3] entry: - %0 = tail call i8 @llvm.x86.addcarryx.u64(i8 0, i64 0, i64 0, i8* null) + %0 = call { i8, i64 } @llvm.x86.addcarryx.u64(i8 0, i64 0, i64 0) + %1 = extractvalue { i8, i64 } %0, 1 + store i64 %1, i64* null, align 1 + %2 = extractvalue { i8, i64 } %0, 0 ret void } Index: test/CodeGen/X86/stack-folding-adx-x86_64.ll =================================================================== --- test/CodeGen/X86/stack-folding-adx-x86_64.ll +++ test/CodeGen/X86/stack-folding-adx-x86_64.ll @@ -10,7 +10,7 @@ define i8 @stack_fold_addcarry_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_addcarry_u32 - ;CHECK: adcxl {{-?[0-9]*}}(%rsp), %ecx {{.*#+}} 4-byte Folded Reload + ;CHECK: adcxl {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 4-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() %2 = tail call i8 @llvm.x86.addcarry.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) ret i8 %2; @@ -19,7 +19,7 @@ define i8 @stack_fold_addcarry_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_addcarry_u64 - ;CHECK: adcxq {{-?[0-9]*}}(%rsp), %rcx {{.*#+}} 8-byte Folded Reload + ;CHECK: adcxq {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 8-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() %2 = tail call i8 @llvm.x86.addcarry.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) ret i8 %2; @@ -28,7 +28,7 @@ define i8 @stack_fold_addcarryx_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_addcarryx_u32 - ;CHECK: adcxl {{-?[0-9]*}}(%rsp), %ecx {{.*#+}} 4-byte Folded Reload + ;CHECK: adcxl {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 4-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() %2 = tail call i8 @llvm.x86.addcarryx.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) ret i8 %2; @@ -37,7 +37,7 @@ define i8 @stack_fold_addcarryx_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_addcarryx_u64 - ;CHECK: adcxq {{-?[0-9]*}}(%rsp), %rcx {{.*#+}} 8-byte Folded Reload + ;CHECK: adcxq {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 8-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() %2 = tail call i8 @llvm.x86.addcarryx.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) ret i8 %2; @@ -46,7 +46,7 @@ define i8 @stack_fold_subborrow_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_subborrow_u32 - ;CHECK: sbbl {{-?[0-9]*}}(%rsp), %ecx {{.*#+}} 4-byte Folded Reload + ;CHECK: sbbl {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 4-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() %2 = tail call i8 @llvm.x86.subborrow.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) ret i8 %2; @@ -55,7 +55,7 @@ define i8 @stack_fold_subborrow_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) { ;CHECK-LABEL: stack_fold_subborrow_u64 - ;CHECK: sbbq {{-?[0-9]*}}(%rsp), %rcx {{.*#+}} 8-byte Folded Reload + ;CHECK: sbbq {{-?[0-9]*}}(%rsp), %{{.*}} {{.*#+}} 8-byte Folded Reload %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() %2 = tail call i8 @llvm.x86.subborrow.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) ret i8 %2;