Index: clang/lib/Basic/Targets/OSTargets.h =================================================================== --- clang/lib/Basic/Targets/OSTargets.h +++ clang/lib/Basic/Targets/OSTargets.h @@ -789,7 +789,7 @@ "i64:64-n8:16:32-S128"); } else if (Triple.getArch() == llvm::Triple::x86_64) { this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-" - "i64:64-n8:16:32:64-S128"); + "i64:64-i128:128-n8:16:32:64-S128"); } else if (Triple.getArch() == llvm::Triple::mipsel) { // Handled on mips' setDataLayout. } else { Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -652,11 +652,11 @@ // Pointers are 32-bit in x32. resetDataLayout(IsX32 ? "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-" - "i64:64-f80:128-n8:16:32:64-S128" + "i64:64-i128:128-f80:128-n8:16:32:64-S128" : IsWinCOFF ? "e-m:w-p270:32:32-p271:32:32-p272:64:" - "64-i64:64-f80:128-n8:16:32:64-S128" + "64-i64:64-i128:128-f80:128-n8:16:32:64-S128" : "e-m:e-p270:32:32-p271:32:32-p272:64:" - "64-i64:64-f80:128-n8:16:32:64-S128"); + "64-i64:64-i128:128-f80:128-n8:16:32:64-S128"); // Use fpret only for long double. RealTypeUsesObjCFPRet = (1 << TargetInfo::LongDouble); @@ -848,7 +848,7 @@ llvm::Triple T = llvm::Triple(Triple); if (T.isiOS()) UseSignedCharForObjCBool = false; - resetDataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:" + resetDataLayout("e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:" "16:32:64-S128"); } Index: clang/test/CodeGen/target-data.c =================================================================== --- clang/test/CodeGen/target-data.c +++ clang/test/CodeGen/target-data.c @@ -20,7 +20,7 @@ // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \ // RUN: FileCheck --check-prefix=X86_64 %s -// X86_64: target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +// X86_64: target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" // RUN: %clang_cc1 -triple xcore-unknown-unknown -emit-llvm -o - %s | \ // RUN: FileCheck --check-prefix=XCORE %s @@ -96,7 +96,7 @@ // RUN: %clang_cc1 -triple x86_64-nacl -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=X86_64-NACL -// X86_64-NACL: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-n8:16:32:64-S128" +// X86_64-NACL: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n8:16:32:64-S128" // RUN: %clang_cc1 -triple arm-nacl -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=ARM-NACL Index: llvm/include/llvm/IR/AutoUpgrade.h =================================================================== --- llvm/include/llvm/IR/AutoUpgrade.h +++ llvm/include/llvm/IR/AutoUpgrade.h @@ -93,7 +93,7 @@ /// Upgrade the datalayout string by adding a section for address space /// pointers. - std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple); + std::string UpgradeDataLayoutString(std::string DL, StringRef Triple); /// Upgrade attributes that changed format or kind. void UpgradeAttributes(AttrBuilder &B); Index: llvm/lib/IR/AutoUpgrade.cpp =================================================================== --- llvm/lib/IR/AutoUpgrade.cpp +++ llvm/lib/IR/AutoUpgrade.cpp @@ -4307,20 +4307,39 @@ return MDTuple::get(T->getContext(), Ops); } -std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { - StringRef AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64"; +std::string llvm::UpgradeDataLayoutString(std::string DLStr, + StringRef TripleStr) { + + Triple TT(TripleStr); + + // We only have upgrades for X86. + if (!TT.isX86()) + return DLStr; - // If X86, and the datalayout matches the expected format, add pointer size - // address spaces to the datalayout. - if (!Triple(TT).isX86() || DL.contains(AddrSpaces)) - return std::string(DL); + StringRef DL = DLStr; - SmallVector Groups; - Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)"); - if (!R.match(DL, &Groups)) - return std::string(DL); + // We have two cases to handle. Missing address spaces and missing i128 + // alignment. We'll handle them separately. + if (TT.isArch64Bit() && !DL.contains("-i128:128")) { + auto I = DL.find("-i64:64-"); + if (I != StringRef::npos) { + // Insert just before the - at the end of the string we matched. + DLStr = (DL.take_front(I + 7) + "-i128:128" + DL.drop_front(I + 7)).str(); + DL = DLStr; + } + } + + StringRef AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64"; + if (!DL.contains(AddrSpaces)) { + SmallVector Groups; + Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)"); + if (R.match(DL, &Groups)) { + DLStr = (Groups[1] + AddrSpaces + Groups[3]).str(); + DL = DLStr; + } + } - return (Groups[1] + AddrSpaces + Groups[3]).str(); + return DLStr; } void llvm::UpgradeAttributes(AttrBuilder &B) { Index: llvm/lib/Target/X86/X86TargetMachine.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetMachine.cpp +++ llvm/lib/Target/X86/X86TargetMachine.cpp @@ -119,6 +119,11 @@ else Ret += "-f64:32:64"; + // 128 bit integers are always aligned to 128 bits, but only 64-bit matters, + // because __int128 is only supported on 64-bit targets. + if (TT.isArch64Bit()) + Ret += "-i128:128"; + // Some ABIs align long double to 128 bits, others to 32. if (TT.isOSNaCl() || TT.isOSIAMCU()) ; // No f80 Index: llvm/test/Bitcode/upgrade-datalayout.ll =================================================================== --- llvm/test/Bitcode/upgrade-datalayout.ll +++ llvm/test/Bitcode/upgrade-datalayout.ll @@ -2,8 +2,8 @@ ; ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s -target datalayout = "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-m:e-p:32:32-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; CHECK: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +; CHECK: target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" Index: llvm/test/Bitcode/upgrade-datalayout3.ll =================================================================== --- llvm/test/Bitcode/upgrade-datalayout3.ll +++ llvm/test/Bitcode/upgrade-datalayout3.ll @@ -2,7 +2,7 @@ ; ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s -target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32" +target datalayout = "e-m:w-p:32:32-i64:64-i128:128-f80:32-n8:16:32-S32" target triple = "i686-pc-windows-msvc" -; CHECK: target datalayout = "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-S32" +; CHECK: target datalayout = "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:32-n8:16:32-S32" Index: llvm/test/CodeGen/X86/atomic-unordered.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-unordered.ll +++ llvm/test/CodeGen/X86/atomic-unordered.ll @@ -323,7 +323,7 @@ ; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rcx, %rdi ; CHECK-O0-NEXT: movl %r8d, %ecx -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq %rax, (%rsp) # 8-byte Spill ; CHECK-O0-NEXT: callq __atomic_load ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx @@ -334,7 +334,7 @@ ; CHECK-O0-NEXT: movq %rdx, 16(%rdi) ; CHECK-O0-NEXT: movq %rcx, 8(%rdi) ; CHECK-O0-NEXT: movq %rax, (%rdi) -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; CHECK-O0-NEXT: movq (%rsp), %rax # 8-byte Reload ; CHECK-O0-NEXT: addq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -367,8 +367,8 @@ define void @store_i256(i256* %ptr, i256 %v) { ; CHECK-O0-LABEL: store_i256: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: subq $40, %rsp -; CHECK-O0-NEXT: .cfi_def_cfa_offset 48 +; CHECK-O0-NEXT: subq $56, %rsp +; CHECK-O0-NEXT: .cfi_def_cfa_offset 64 ; CHECK-O0-NEXT: xorl %eax, %eax ; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %r9 ; CHECK-O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp) @@ -376,13 +376,13 @@ ; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movl $32, %ecx -; CHECK-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill +; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rcx, %rdi -; CHECK-O0-NEXT: movq (%rsp), %rsi # 8-byte Reload +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; CHECK-O0-NEXT: movq %r9, %rdx ; CHECK-O0-NEXT: movl %eax, %ecx ; CHECK-O0-NEXT: callq __atomic_store -; CHECK-O0-NEXT: addq $40, %rsp +; CHECK-O0-NEXT: addq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq ; @@ -394,8 +394,8 @@ ; CHECK-O3-NEXT: movq %r8, {{[0-9]+}}(%rsp) ; CHECK-O3-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; CHECK-O3-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-O3-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CHECK-O3-NEXT: movq %rsi, (%rsp) +; CHECK-O3-NEXT: movq %rsp, %rdx ; CHECK-O3-NEXT: movl $32, %edi ; CHECK-O3-NEXT: movq %rax, %rsi ; CHECK-O3-NEXT: xorl %ecx, %ecx Index: llvm/test/CodeGen/X86/bitcast-i256.ll =================================================================== --- llvm/test/CodeGen/X86/bitcast-i256.ll +++ llvm/test/CodeGen/X86/bitcast-i256.ll @@ -14,7 +14,7 @@ ; SLOW: # %bb.0: ; SLOW-NEXT: movq %rdi, %rax ; SLOW-NEXT: vextractf128 $1, %ymm0, 16(%rdi) -; SLOW-NEXT: vmovups %xmm0, (%rdi) +; SLOW-NEXT: vmovaps %xmm0, (%rdi) ; SLOW-NEXT: vzeroupper ; SLOW-NEXT: retq %r = bitcast <8 x i32> %a to i256 Index: llvm/test/CodeGen/X86/catchpad-dynamic-alloca.ll =================================================================== --- llvm/test/CodeGen/X86/catchpad-dynamic-alloca.ll +++ llvm/test/CodeGen/X86/catchpad-dynamic-alloca.ll @@ -62,4 +62,4 @@ ; CHECK-LABEL: $handlerMap$0$test2: ; CHECK: .long 0 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 8 +; CHECK-NEXT: .long 16 Index: llvm/test/CodeGen/X86/implicit-null-check.ll =================================================================== --- llvm/test/CodeGen/X86/implicit-null-check.ll +++ llvm/test/CodeGen/X86/implicit-null-check.ll @@ -127,19 +127,15 @@ ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: Ltmp3: -; CHECK-NEXT: movq (%rsi), %rcx ## on-fault: LBB5_1 +; CHECK-NEXT: movaps (%rsi), %xmm0 ## on-fault: LBB5_1 ; CHECK-NEXT: ## %bb.2: ## %not_null -; CHECK-NEXT: movq 8(%rsi), %rdx -; CHECK-NEXT: movq 16(%rsi), %rdi -; CHECK-NEXT: movq 24(%rsi), %rsi -; CHECK-NEXT: movq %rsi, 24(%rax) -; CHECK-NEXT: movq %rdi, 16(%rax) -; CHECK-NEXT: movq %rdx, 8(%rax) -; CHECK-NEXT: movq %rcx, (%rax) +; CHECK-NEXT: movaps 16(%rsi), %xmm1 +; CHECK-NEXT: movaps %xmm1, 16(%rax) +; CHECK-NEXT: movaps %xmm0, (%rax) ; CHECK-NEXT: retq ; CHECK-NEXT: LBB5_1: ## %is_null -; CHECK-NEXT: movq $0, 24(%rax) -; CHECK-NEXT: movq $0, 16(%rax) +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: movaps %xmm0, 16(%rax) ; CHECK-NEXT: movq $0, 8(%rax) ; CHECK-NEXT: movq $42, (%rax) ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/legalize-shl-vec.ll =================================================================== --- llvm/test/CodeGen/X86/legalize-shl-vec.ll +++ llvm/test/CodeGen/X86/legalize-shl-vec.ll @@ -48,13 +48,13 @@ ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi -; X64-NEXT: shldq $2, %rcx, %rdx -; X64-NEXT: shldq $2, %rdi, %rcx +; X64-NEXT: shldq $2, %rdx, %rcx +; X64-NEXT: shldq $2, %rdi, %rdx ; X64-NEXT: shldq $2, %r9, %rdi ; X64-NEXT: shlq $63, %rsi ; X64-NEXT: shlq $2, %r9 -; X64-NEXT: movq %rdx, 56(%rax) -; X64-NEXT: movq %rcx, 48(%rax) +; X64-NEXT: movq %rcx, 56(%rax) +; X64-NEXT: movq %rdx, 48(%rax) ; X64-NEXT: movq %rdi, 40(%rax) ; X64-NEXT: movq %r9, 32(%rax) ; X64-NEXT: movq %rsi, 24(%rax) @@ -143,12 +143,12 @@ ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: shrdq $4, %rsi, %r9 -; X64-NEXT: shrdq $4, %rcx, %rsi +; X64-NEXT: shrdq $4, %rdx, %rsi +; X64-NEXT: shrdq $4, %rcx, %rdx ; X64-NEXT: shrq $63, %r8 -; X64-NEXT: shrdq $4, %rdx, %rcx -; X64-NEXT: shrq $4, %rdx -; X64-NEXT: movq %rdx, 56(%rdi) -; X64-NEXT: movq %rcx, 48(%rdi) +; X64-NEXT: shrq $4, %rcx +; X64-NEXT: movq %rcx, 56(%rdi) +; X64-NEXT: movq %rdx, 48(%rdi) ; X64-NEXT: movq %rsi, 40(%rdi) ; X64-NEXT: movq %r9, 32(%rdi) ; X64-NEXT: movq %r8, (%rdi) @@ -237,12 +237,12 @@ ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: shrdq $6, %rsi, %r9 -; X64-NEXT: shrdq $6, %rcx, %rsi +; X64-NEXT: shrdq $6, %rdx, %rsi +; X64-NEXT: shrdq $6, %rcx, %rdx ; X64-NEXT: sarq $63, %r8 -; X64-NEXT: shrdq $6, %rdx, %rcx -; X64-NEXT: sarq $6, %rdx -; X64-NEXT: movq %rdx, 56(%rdi) -; X64-NEXT: movq %rcx, 48(%rdi) +; X64-NEXT: sarq $6, %rcx +; X64-NEXT: movq %rcx, 56(%rdi) +; X64-NEXT: movq %rdx, 48(%rdi) ; X64-NEXT: movq %rsi, 40(%rdi) ; X64-NEXT: movq %r9, 32(%rdi) ; X64-NEXT: movq %r8, 24(%rdi) Index: llvm/test/CodeGen/X86/osx-private-labels.ll =================================================================== --- llvm/test/CodeGen/X86/osx-private-labels.ll +++ llvm/test/CodeGen/X86/osx-private-labels.ll @@ -36,7 +36,7 @@ @private6 = private unnamed_addr constant i128 42 ; CHECK: .section __TEXT,__literal16,16byte_literals -; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: L_private6: %struct._objc_class = type { i8* } Index: llvm/test/CodeGen/X86/scheduler-backtracking.ll =================================================================== --- llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -252,26 +252,26 @@ ; ILP-LABEL: test2: ; ILP: # %bb.0: ; ILP-NEXT: movq %rdi, %rax -; ILP-NEXT: xorl %edi, %edi +; ILP-NEXT: xorps %xmm0, %xmm0 +; ILP-NEXT: movaps %xmm0, 16(%rdi) +; ILP-NEXT: xorl %r9d, %r9d ; ILP-NEXT: movq %rsi, %r11 ; ILP-NEXT: negq %r11 ; ILP-NEXT: movl $0, %r10d ; ILP-NEXT: sbbq %rdx, %r10 -; ILP-NEXT: movl $0, %r9d -; ILP-NEXT: sbbq %rcx, %r9 -; ILP-NEXT: sbbq %r8, %rdi -; ILP-NEXT: andq %rcx, %r9 -; ILP-NEXT: bsrq %r9, %rcx -; ILP-NEXT: xorq $63, %rcx -; ILP-NEXT: andq %r8, %rdi -; ILP-NEXT: bsrq %rdi, %r8 +; ILP-NEXT: movl $0, %edi +; ILP-NEXT: sbbq %rcx, %rdi +; ILP-NEXT: sbbq %r8, %r9 +; ILP-NEXT: andq %r8, %r9 +; ILP-NEXT: bsrq %r9, %r8 ; ILP-NEXT: andq %rdx, %r10 ; ILP-NEXT: bsrq %r10, %rdx ; ILP-NEXT: xorq $63, %r8 +; ILP-NEXT: andq %rcx, %rdi +; ILP-NEXT: bsrq %rdi, %rcx +; ILP-NEXT: xorq $63, %rcx ; ILP-NEXT: addq $64, %rcx -; ILP-NEXT: testq %rdi, %rdi -; ILP-NEXT: movq $0, 24(%rax) -; ILP-NEXT: movq $0, 16(%rax) +; ILP-NEXT: testq %r9, %r9 ; ILP-NEXT: movq $0, 8(%rax) ; ILP-NEXT: cmovneq %r8, %rcx ; ILP-NEXT: xorq $63, %rdx @@ -292,6 +292,8 @@ ; HYBRID-LABEL: test2: ; HYBRID: # %bb.0: ; HYBRID-NEXT: movq %rdi, %rax +; HYBRID-NEXT: xorps %xmm0, %xmm0 +; HYBRID-NEXT: movaps %xmm0, 16(%rdi) ; HYBRID-NEXT: xorl %r9d, %r9d ; HYBRID-NEXT: movq %rsi, %r11 ; HYBRID-NEXT: negq %r11 @@ -324,14 +326,14 @@ ; HYBRID-NEXT: orq %r9, %rdi ; HYBRID-NEXT: cmovneq %rcx, %rsi ; HYBRID-NEXT: movq %rsi, (%rax) -; HYBRID-NEXT: movq $0, 24(%rax) -; HYBRID-NEXT: movq $0, 16(%rax) ; HYBRID-NEXT: movq $0, 8(%rax) ; HYBRID-NEXT: retq ; ; BURR-LABEL: test2: ; BURR: # %bb.0: ; BURR-NEXT: movq %rdi, %rax +; BURR-NEXT: xorps %xmm0, %xmm0 +; BURR-NEXT: movaps %xmm0, 16(%rdi) ; BURR-NEXT: xorl %r9d, %r9d ; BURR-NEXT: movq %rsi, %r11 ; BURR-NEXT: negq %r11 @@ -364,8 +366,6 @@ ; BURR-NEXT: orq %r9, %rdi ; BURR-NEXT: cmovneq %rcx, %rsi ; BURR-NEXT: movq %rsi, (%rax) -; BURR-NEXT: movq $0, 24(%rax) -; BURR-NEXT: movq $0, 16(%rax) ; BURR-NEXT: movq $0, 8(%rax) ; BURR-NEXT: retq ; @@ -403,15 +403,17 @@ ; SRC-NEXT: subq $-128, %rsi ; SRC-NEXT: orq %r9, %rdi ; SRC-NEXT: cmovneq %rdx, %rsi +; SRC-NEXT: xorps %xmm0, %xmm0 +; SRC-NEXT: movaps %xmm0, 16(%rax) ; SRC-NEXT: movq %rsi, (%rax) -; SRC-NEXT: movq $0, 24(%rax) -; SRC-NEXT: movq $0, 16(%rax) ; SRC-NEXT: movq $0, 8(%rax) ; SRC-NEXT: retq ; ; LIN-LABEL: test2: ; LIN: # %bb.0: ; LIN-NEXT: movq %rdi, %rax +; LIN-NEXT: xorps %xmm0, %xmm0 +; LIN-NEXT: movaps %xmm0, 16(%rdi) ; LIN-NEXT: movq %rsi, %rdi ; LIN-NEXT: negq %rdi ; LIN-NEXT: andq %rsi, %rdi @@ -445,8 +447,6 @@ ; LIN-NEXT: cmoveq %rdx, %rdi ; LIN-NEXT: movq %rdi, (%rax) ; LIN-NEXT: movq $0, 8(%rax) -; LIN-NEXT: movq $0, 16(%rax) -; LIN-NEXT: movq $0, 24(%rax) ; LIN-NEXT: retq %b = sub i256 0, %a %c = and i256 %b, %a @@ -457,52 +457,56 @@ define i256 @test3(i256 %n) nounwind { ; ILP-LABEL: test3: ; ILP: # %bb.0: +; ILP-NEXT: pushq %rbx ; ILP-NEXT: movq %rdi, %rax -; ILP-NEXT: xorl %r10d, %r10d +; ILP-NEXT: xorps %xmm0, %xmm0 +; ILP-NEXT: movaps %xmm0, 16(%rdi) +; ILP-NEXT: xorl %edi, %edi ; ILP-NEXT: movq %rsi, %r9 ; ILP-NEXT: negq %r9 +; ILP-NEXT: movl $0, %r10d +; ILP-NEXT: sbbq %rdx, %r10 ; ILP-NEXT: movl $0, %r11d -; ILP-NEXT: sbbq %rdx, %r11 -; ILP-NEXT: movl $0, %edi -; ILP-NEXT: sbbq %rcx, %rdi -; ILP-NEXT: sbbq %r8, %r10 +; ILP-NEXT: sbbq %rcx, %r11 +; ILP-NEXT: sbbq %r8, %rdi +; ILP-NEXT: notq %r8 +; ILP-NEXT: andq %rdi, %r8 +; ILP-NEXT: bsrq %r8, %rbx +; ILP-NEXT: notq %rdx +; ILP-NEXT: andq %r10, %rdx ; ILP-NEXT: notq %rcx -; ILP-NEXT: andq %rdi, %rcx +; ILP-NEXT: andq %r11, %rcx +; ILP-NEXT: bsrq %rdx, %r10 +; ILP-NEXT: xorq $63, %rbx ; ILP-NEXT: bsrq %rcx, %rdi -; ILP-NEXT: notq %rdx -; ILP-NEXT: andq %r11, %rdx ; ILP-NEXT: xorq $63, %rdi -; ILP-NEXT: notq %r8 -; ILP-NEXT: andq %r10, %r8 -; ILP-NEXT: bsrq %r8, %r10 -; ILP-NEXT: xorq $63, %r10 ; ILP-NEXT: addq $64, %rdi -; ILP-NEXT: bsrq %rdx, %r11 ; ILP-NEXT: notq %rsi ; ILP-NEXT: testq %r8, %r8 -; ILP-NEXT: movq $0, 24(%rax) -; ILP-NEXT: movq $0, 16(%rax) ; ILP-NEXT: movq $0, 8(%rax) -; ILP-NEXT: cmovneq %r10, %rdi -; ILP-NEXT: xorq $63, %r11 +; ILP-NEXT: cmovneq %rbx, %rdi +; ILP-NEXT: xorq $63, %r10 ; ILP-NEXT: andq %r9, %rsi -; ILP-NEXT: movl $127, %r9d +; ILP-NEXT: movl $127, %ebx ; ILP-NEXT: bsrq %rsi, %rsi -; ILP-NEXT: cmoveq %r9, %rsi +; ILP-NEXT: cmoveq %rbx, %rsi ; ILP-NEXT: xorq $63, %rsi ; ILP-NEXT: addq $64, %rsi ; ILP-NEXT: testq %rdx, %rdx -; ILP-NEXT: cmovneq %r11, %rsi +; ILP-NEXT: cmovneq %r10, %rsi ; ILP-NEXT: subq $-128, %rsi -; ILP-NEXT: orq %rcx, %r8 +; ILP-NEXT: orq %r8, %rcx ; ILP-NEXT: cmovneq %rdi, %rsi ; ILP-NEXT: movq %rsi, (%rax) +; ILP-NEXT: popq %rbx ; ILP-NEXT: retq ; ; HYBRID-LABEL: test3: ; HYBRID: # %bb.0: ; HYBRID-NEXT: pushq %rbx ; HYBRID-NEXT: movq %rdi, %rax +; HYBRID-NEXT: xorps %xmm0, %xmm0 +; HYBRID-NEXT: movaps %xmm0, 16(%rdi) ; HYBRID-NEXT: xorl %edi, %edi ; HYBRID-NEXT: movq %rsi, %r9 ; HYBRID-NEXT: negq %r9 @@ -539,8 +543,6 @@ ; HYBRID-NEXT: orq %r8, %rcx ; HYBRID-NEXT: cmovneq %rdi, %rsi ; HYBRID-NEXT: movq %rsi, (%rax) -; HYBRID-NEXT: movq $0, 24(%rax) -; HYBRID-NEXT: movq $0, 16(%rax) ; HYBRID-NEXT: movq $0, 8(%rax) ; HYBRID-NEXT: popq %rbx ; HYBRID-NEXT: retq @@ -549,6 +551,8 @@ ; BURR: # %bb.0: ; BURR-NEXT: pushq %rbx ; BURR-NEXT: movq %rdi, %rax +; BURR-NEXT: xorps %xmm0, %xmm0 +; BURR-NEXT: movaps %xmm0, 16(%rdi) ; BURR-NEXT: xorl %edi, %edi ; BURR-NEXT: movq %rsi, %r9 ; BURR-NEXT: negq %r9 @@ -585,8 +589,6 @@ ; BURR-NEXT: orq %r8, %rcx ; BURR-NEXT: cmovneq %rdi, %rsi ; BURR-NEXT: movq %rsi, (%rax) -; BURR-NEXT: movq $0, 24(%rax) -; BURR-NEXT: movq $0, 16(%rax) ; BURR-NEXT: movq $0, 8(%rax) ; BURR-NEXT: popq %rbx ; BURR-NEXT: retq @@ -629,15 +631,17 @@ ; SRC-NEXT: subq $-128, %rsi ; SRC-NEXT: orq %rcx, %r8 ; SRC-NEXT: cmovneq %rdi, %rsi +; SRC-NEXT: xorps %xmm0, %xmm0 +; SRC-NEXT: movaps %xmm0, 16(%rax) ; SRC-NEXT: movq %rsi, (%rax) -; SRC-NEXT: movq $0, 24(%rax) -; SRC-NEXT: movq $0, 16(%rax) ; SRC-NEXT: movq $0, 8(%rax) ; SRC-NEXT: retq ; ; LIN-LABEL: test3: ; LIN: # %bb.0: ; LIN-NEXT: movq %rdi, %rax +; LIN-NEXT: xorps %xmm0, %xmm0 +; LIN-NEXT: movaps %xmm0, 16(%rdi) ; LIN-NEXT: movq %rsi, %rdi ; LIN-NEXT: negq %rdi ; LIN-NEXT: notq %rsi @@ -675,8 +679,6 @@ ; LIN-NEXT: cmoveq %rsi, %rdi ; LIN-NEXT: movq %rdi, (%rax) ; LIN-NEXT: movq $0, 8(%rax) -; LIN-NEXT: movq $0, 16(%rax) -; LIN-NEXT: movq $0, 24(%rax) ; LIN-NEXT: retq %m = sub i256 -1, %n %x = sub i256 0, %n Index: llvm/test/CodeGen/X86/setcc-wide-types.ll =================================================================== --- llvm/test/CodeGen/X86/setcc-wide-types.ll +++ llvm/test/CodeGen/X86/setcc-wide-types.ll @@ -620,13 +620,11 @@ define i32 @ne_i128_pair(i128* %a, i128* %b) { ; SSE2-LABEL: ne_i128_pair: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqu (%rdi), %xmm0 -; SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; SSE2-NEXT: movdqu (%rsi), %xmm2 -; SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: movdqa 16(%rdi), %xmm1 +; SSE2-NEXT: pcmpeqb 16(%rsi), %xmm1 +; SSE2-NEXT: pcmpeqb (%rsi), %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %ecx ; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF @@ -635,13 +633,11 @@ ; ; SSE41-LABEL: ne_i128_pair: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqu (%rdi), %xmm0 -; SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; SSE41-NEXT: movdqu (%rsi), %xmm2 -; SSE41-NEXT: pxor %xmm0, %xmm2 -; SSE41-NEXT: movdqu 16(%rsi), %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: movdqa (%rdi), %xmm0 +; SSE41-NEXT: movdqa 16(%rdi), %xmm1 +; SSE41-NEXT: pxor 16(%rsi), %xmm1 +; SSE41-NEXT: pxor (%rsi), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: ptest %xmm0, %xmm0 ; SSE41-NEXT: setne %al @@ -649,8 +645,8 @@ ; ; AVXANY-LABEL: ne_i128_pair: ; AVXANY: # %bb.0: -; AVXANY-NEXT: vmovdqu (%rdi), %xmm0 -; AVXANY-NEXT: vmovdqu 16(%rdi), %xmm1 +; AVXANY-NEXT: vmovdqa (%rdi), %xmm0 +; AVXANY-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVXANY-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 ; AVXANY-NEXT: vpxor (%rsi), %xmm0, %xmm0 ; AVXANY-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -678,13 +674,11 @@ define i32 @eq_i128_pair(i128* %a, i128* %b) { ; SSE2-LABEL: eq_i128_pair: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqu (%rdi), %xmm0 -; SSE2-NEXT: movdqu 16(%rdi), %xmm1 -; SSE2-NEXT: movdqu (%rsi), %xmm2 -; SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; SSE2-NEXT: movdqu 16(%rsi), %xmm0 -; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: movdqa 16(%rdi), %xmm1 +; SSE2-NEXT: pcmpeqb 16(%rsi), %xmm1 +; SSE2-NEXT: pcmpeqb (%rsi), %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %ecx ; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF @@ -693,13 +687,11 @@ ; ; SSE41-LABEL: eq_i128_pair: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqu (%rdi), %xmm0 -; SSE41-NEXT: movdqu 16(%rdi), %xmm1 -; SSE41-NEXT: movdqu (%rsi), %xmm2 -; SSE41-NEXT: pxor %xmm0, %xmm2 -; SSE41-NEXT: movdqu 16(%rsi), %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: por %xmm2, %xmm0 +; SSE41-NEXT: movdqa (%rdi), %xmm0 +; SSE41-NEXT: movdqa 16(%rdi), %xmm1 +; SSE41-NEXT: pxor 16(%rsi), %xmm1 +; SSE41-NEXT: pxor (%rsi), %xmm0 +; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: ptest %xmm0, %xmm0 ; SSE41-NEXT: sete %al @@ -707,8 +699,8 @@ ; ; AVXANY-LABEL: eq_i128_pair: ; AVXANY: # %bb.0: -; AVXANY-NEXT: vmovdqu (%rdi), %xmm0 -; AVXANY-NEXT: vmovdqu 16(%rdi), %xmm1 +; AVXANY-NEXT: vmovdqa (%rdi), %xmm0 +; AVXANY-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVXANY-NEXT: vpxor 16(%rsi), %xmm1, %xmm1 ; AVXANY-NEXT: vpxor (%rsi), %xmm0, %xmm0 ; AVXANY-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -1240,8 +1232,8 @@ define i1 @eq_i512_op(i512 %a, i512 %b) { ; ANY-LABEL: eq_i512_op: ; ANY: # %bb.0: -; ANY-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; ANY-NEXT: movq {{[0-9]+}}(%rsp), %rax +; ANY-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; ANY-NEXT: addq $1, %rdi ; ANY-NEXT: adcq $0, %rsi ; ANY-NEXT: adcq $0, %rdx Index: llvm/test/CodeGen/X86/sret-implicit.ll =================================================================== --- llvm/test/CodeGen/X86/sret-implicit.ll +++ llvm/test/CodeGen/X86/sret-implicit.ll @@ -25,7 +25,9 @@ ; X64-LABEL: sret_demoted ; X64-DAG: movq %rdi, %rax -; X64-DAG: movq $0, (%rdi) +; X64-DAG: xorps %xmm0, %xmm0 +; X64-DAG: movaps %xmm0, 16(%rdi) +; X64-DAG: movaps %xmm0, (%rdi) ; X64: retq ; X86-LABEL: sret_demoted Index: llvm/test/CodeGen/X86/statepoint-vector.ll =================================================================== --- llvm/test/CodeGen/X86/statepoint-vector.ll +++ llvm/test/CodeGen/X86/statepoint-vector.ll @@ -122,9 +122,9 @@ ; CHECK-NEXT: subq $40, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq $-1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq $-1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq $-1, (%rsp) ; CHECK-NEXT: callq do_safepoint ; CHECK-NEXT: .Ltmp4: ; CHECK-NEXT: addq $40, %rsp Index: llvm/test/tools/llvm-lto2/X86/pipeline.ll =================================================================== --- llvm/test/tools/llvm-lto2/X86/pipeline.ll +++ llvm/test/tools/llvm-lto2/X86/pipeline.ll @@ -15,7 +15,7 @@ ; is accepted). ; RUN: llvm-lto2 run %t1.bc -o %t.o -use-new-pm -r %t1.bc,patatino,px -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @patatino() { Index: llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll =================================================================== --- llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll +++ llvm/test/tools/llvm-lto2/X86/slp-vectorize-pm.ll @@ -26,7 +26,7 @@ ; CHECK-O2-LPV: = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-O3-LPV: = !{!"llvm.loop.isvectorized", i32 1} -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-m:e-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define i32 @foo(i32* %a) { Index: llvm/test/tools/llvm-lto2/X86/stats-file-option.ll =================================================================== --- llvm/test/tools/llvm-lto2/X86/stats-file-option.ll +++ llvm/test/tools/llvm-lto2/X86/stats-file-option.ll @@ -6,7 +6,7 @@ ; RUN: llvm-lto2 run %t1.bc -o %t.o -r %t1.bc,patatino,px -stats-file=%t2.stats ; RUN: FileCheck --input-file=%t2.stats %s -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define void @patatino() { Index: llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp =================================================================== --- llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp +++ llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp @@ -15,23 +15,23 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) { std::string DL1 = - UpgradeDataLayoutString("e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128", + UpgradeDataLayoutString("e-m:e-p:32:32-i64:64-i128:128-f80:128-n8:16:32:64-S128", "x86_64-unknown-linux-gnu"); std::string DL2 = UpgradeDataLayoutString( - "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32", "i686-pc-windows-msvc"); + "e-m:w-p:32:32-i64:64-i128:128-f80:32-n8:16:32-S32", "i686-pc-windows-msvc"); std::string DL3 = UpgradeDataLayoutString("e-m:o-i64:64-i128:128-n32:64-S128", "x86_64-apple-macosx"); EXPECT_EQ(DL1, "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64" - "-f80:128-n8:16:32:64-S128"); + "-i128:128-f80:128-n8:16:32:64-S128"); EXPECT_EQ(DL2, "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64" - "-f80:32-n8:16:32-S32"); + "-i128:128-f80:32-n8:16:32-S32"); EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128" "-n32:64-S128"); } TEST(DataLayoutUpgradeTest, NoDataLayoutUpgrade) { std::string DL1 = UpgradeDataLayoutString( - "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32" + "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32" "-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" "-n8:16:32:64-S128", "x86_64-unknown-linux-gnu"); @@ -40,7 +40,7 @@ "powerpc64le-unknown-linux-gnu"); std::string DL4 = UpgradeDataLayoutString("e-m:o-i64:64-i128:128-n32:64-S128", "aarch64--"); - EXPECT_EQ(DL1, "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64" + EXPECT_EQ(DL1, "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128" "-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" "-f80:128:128-n8:16:32:64-S128"); EXPECT_EQ(DL2, "e-p:32:32"); @@ -51,9 +51,9 @@ TEST(DataLayoutUpgradeTest, EmptyDataLayout) { std::string DL1 = UpgradeDataLayoutString("", "x86_64-unknown-linux-gnu"); std::string DL2 = UpgradeDataLayoutString( - "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128", ""); + "e-m:e-p:32:32-i64:64-i128:128-f80:128-n8:16:32:64-S128", ""); EXPECT_EQ(DL1, ""); - EXPECT_EQ(DL2, "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"); + EXPECT_EQ(DL2, "e-m:e-p:32:32-i64:64-i128:128-f80:128-n8:16:32:64-S128"); } } // end namespace