Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -53,8 +53,8 @@ let TargetPrefix = "x86" in { def int_x86_rdtsc : GCCBuiltin<"__builtin_ia32_rdtsc">, Intrinsic<[llvm_i64_ty], [], []>; - def int_x86_rdtscp : GCCBuiltin<"__builtin_ia32_rdtscp">, - Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], [IntrArgMemOnly]>; + def int_x86_rdtscp : + Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>; } // Read Performance-Monitoring Counter. Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -395,6 +395,17 @@ if (Name == "subborrow.u64") return UpgradeADCSBBIntrinsic(F, Intrinsic::x86_subborrow_u64, NewFn); + if (Name == "rdtscp") { + // If this intrinsic has 0 operands, it's the new version. + if (F->getFunctionType()->getNumParams() == 0) + return false; + + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::x86_rdtscp); + return true; + } + // SSE4.1 ptest functions may have an old signature. if (Name.startswith("sse41.ptest")) { // Added in 3.2 if (Name.substr(11) == "c") @@ -3441,6 +3452,32 @@ break; } + case Intrinsic::x86_rdtscp: { + // This used to take 1 arguments. If we have no arguments, it is already + // upgraded. + if (CI->getNumOperands() == 0) + return; + + NewCall = Builder.CreateCall(NewFn); + // Extract the second result and store it. + Value *Data = Builder.CreateExtractValue(NewCall, 1); + // Cast the pointer to the right type. + Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0), + llvm::PointerType::getUnqual(Data->getType())); + Builder.CreateAlignedStore(Data, Ptr, 1); + // Replace the original call result with the first result of the new call. + Value *TSC = Builder.CreateExtractValue(NewCall, 0); + + std::string Name = CI->getName(); + if (!Name.empty()) { + CI->setName(Name + ".old"); + NewCall->setName(Name); + } + CI->replaceAllUsesWith(TSC); + CI->eraseFromParent(); + return; + } + case Intrinsic::x86_addcarryx_u32: case Intrinsic::x86_addcarryx_u64: case Intrinsic::x86_addcarry_u32: Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -21746,39 +21746,39 @@ } SDValue Chain = HI.getValue(1); + SDValue TSC; + if (Subtarget.is64Bit()) { + // The EDX register is loaded with the high-order 32 bits of the MSR, and + // the EAX register is loaded with the low-order 32 bits. + TSC = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, + DAG.getConstant(32, DL, MVT::i8)); + TSC = DAG.getNode(ISD::OR, DL, MVT::i64, LO, TSC); + } else { + // Use a buildpair to merge the two 32-bit values into a 64-bit one. + TSC = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, { LO, HI }); + } + if (Opcode == X86ISD::RDTSCP_DAG) { - assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); + assert(N->getNumOperands() == 2 && "Unexpected number of operands!"); // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into // the ECX register. Add 'ecx' explicitly to the chain. SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, HI.getValue(2)); - // Explicitly store the content of ECX at the location passed in input - // to the 'rdtscp' intrinsic. - Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2), - MachinePointerInfo()); - } - if (Subtarget.is64Bit()) { - // The EDX register is loaded with the high-order 32 bits of the MSR, and - // the EAX register is loaded with the low-order 32 bits. - SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, - DAG.getConstant(32, DL, MVT::i8)); - Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); - Results.push_back(Chain); + Results.push_back(TSC); + Results.push_back(ecx); + Results.push_back(ecx.getValue(1)); return; } - // Use a buildpair to merge the two 32-bit values into a 64-bit one. - SDValue Ops[] = { LO, HI }; - SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); - Results.push_back(Pair); + Results.push_back(TSC); Results.push_back(Chain); } static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - SmallVector Results; + SmallVector Results; SDLoc DL(Op); getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); Index: test/CodeGen/X86/rdtsc-upgrade.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/rdtsc-upgrade.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=generic | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=generic | FileCheck %s --check-prefix=X64 + +; Verify upgrading of the old form of the rdtscp intrinsic. + +define i64 @test_builtin_rdtscp(i8* %A) { +; X86-LABEL: test_builtin_rdtscp: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: rdtscp +; X86-NEXT: movl %ecx, (%esi) +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: test_builtin_rdtscp: +; X64: # %bb.0: +; X64-NEXT: rdtscp +; X64-NEXT: shlq $32, %rdx +; X64-NEXT: orq %rdx, %rax +; X64-NEXT: movl %ecx, (%rdi) +; X64-NEXT: retq + %1 = tail call i64 @llvm.x86.rdtscp(i8* %A) + ret i64 %1 +} + +declare i64 @llvm.x86.rdtscp(i8*) Index: test/CodeGen/X86/rdtsc.ll =================================================================== --- test/CodeGen/X86/rdtsc.ll +++ test/CodeGen/X86/rdtsc.ll @@ -56,15 +56,19 @@ ; X64-LABEL: test_builtin_rdtscp: ; X64: # %bb.0: ; X64-NEXT: rdtscp -; X64-NEXT: movl %ecx, (%rdi) ; X64-NEXT: shlq $32, %rdx ; X64-NEXT: orq %rdx, %rax +; X64-NEXT: movl %ecx, (%rdi) ; X64-NEXT: retq - %1 = tail call i64 @llvm.x86.rdtscp(i8* %A) - ret i64 %1 + %1 = call { i64, i32 } @llvm.x86.rdtscp() + %2 = extractvalue { i64, i32 } %1, 1 + %3 = bitcast i8* %A to i32* + store i32 %2, i32* %3, align 1 + %4 = extractvalue { i64, i32 } %1, 0 + ret i64 %4 } declare i64 @llvm.readcyclecounter() -declare i64 @llvm.x86.rdtscp(i8*) +declare { i64, i32 } @llvm.x86.rdtscp() declare i64 @llvm.x86.rdtsc()