diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -49214,7 +49214,13 @@ if (VConstraint && Subtarget.hasVLX()) return std::make_pair(0U, &X86::FR64XRegClass); return std::make_pair(0U, &X86::FR64RegClass); - // TODO: Handle i128 in FR128RegClass after it is tested well. + case MVT::i128: + if (Subtarget.is64Bit()) { + if (VConstraint && Subtarget.hasVLX()) + return std::make_pair(0U, &X86::VR128XRegClass); + return std::make_pair(0U, &X86::VR128RegClass); + } + break; // Vector types and fp128. case MVT::f128: case MVT::v16i8: diff --git a/llvm/test/CodeGen/X86/inline-asm-x-i128.ll b/llvm/test/CodeGen/X86/inline-asm-x-i128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/inline-asm-x-i128.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnux32 | FileCheck %s +; RUN: not llc < %s -mtriple=i386-unknown-linux-gnu 2>&1 | FileCheck %s --check-prefix=ERROR + +; For 32-bit we still error since __int128 isn't supported in the frontend. +; ERROR: error: couldn't allocate output register for constraint 'x' + +define { i64, i64 } @foo(i64 %0, i64 %1) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %xmm0 +; CHECK-NEXT: movq %rdi, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: #APP +; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: movq %xmm0, %rax +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NEXT: movq %xmm0, %rdx +; CHECK-NEXT: retq + %3 = zext i64 %1 to i128 + %4 = shl nuw i128 %3, 64 + %5 = zext i64 %0 to i128 + %6 = or i128 %4, %5 + %7 = tail call i128 asm sideeffect "movdqa $1, $0", "=x,x,~{dirflag},~{fpsr},~{flags}"(i128 %6) + %8 = trunc i128 %7 to i64 + %9 = lshr i128 %7, 64 + %10 = trunc i128 %9 to i64 + %11 = insertvalue { i64, i64 } undef, i64 %8, 0 + %12 = insertvalue { i64, i64 } %11, i64 %10, 1 + ret { i64, i64 } %12 +}