Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -88,12 +88,12 @@ [Feature3DNow]>; // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied // feature, because SSE2 can be disabled (e.g. for compiling OS kernels) -// without disabling 64-bit mode. +// without disabling 64-bit mode. Nothing should imply this feature bit. It +// is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions">; def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", - "64-bit with cmpxchg16b", - [Feature64Bit]>; + "64-bit with cmpxchg16b">; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", @@ -520,6 +520,7 @@ FeatureSSE3, FeatureFXSR, FeatureNOPL, + Feature64Bit, FeatureCMPXCHG16B ]>; @@ -532,6 +533,7 @@ FeatureSSSE3, FeatureFXSR, FeatureNOPL, + Feature64Bit, FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureMacroFusion @@ -544,6 +546,7 @@ FeatureSSE41, FeatureFXSR, FeatureNOPL, + Feature64Bit, FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureMacroFusion @@ -559,6 +562,7 @@ FeatureSSSE3, FeatureFXSR, FeatureNOPL, + Feature64Bit, FeatureCMPXCHG16B, FeatureMOVBE, FeatureLEAForSP, @@ -580,6 +584,7 @@ FeatureSSE42, FeatureFXSR, FeatureNOPL, + Feature64Bit, FeatureCMPXCHG16B, FeatureMOVBE, FeaturePOPCNT, @@ -615,6 +620,7 @@ FeatureSSE42, FeatureFXSR, FeatureNOPL, + Feature64Bit, FeatureCMPXCHG16B, FeatureMOVBE, FeaturePOPCNT, @@ -675,6 +681,7 @@ FeatureSSE42, FeatureFXSR, FeatureNOPL, + Feature64Bit, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureLAHFSAHF, @@ -692,6 +699,7 @@ FeatureSSE42, FeatureFXSR, FeatureNOPL, + Feature64Bit, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureAES, @@ -710,6 +718,7 @@ FeatureAVX, FeatureFXSR, FeatureNOPL, + Feature64Bit, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureAES, @@ -927,13 +936,13 @@ foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { def : Proc; + FeatureCMOV, Feature64Bit]>; } foreach P = ["amdfam10", "barcelona"] in { def : Proc; + FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>; } // Bobcat @@ -945,6 +954,7 @@ FeatureSSE4A, FeatureFXSR, FeatureNOPL, + Feature64Bit, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureLZCNT, @@ -963,6 +973,7 @@ FeatureFXSR, FeatureNOPL, FeatureSSE4A, + Feature64Bit, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureAES, @@ -987,6 +998,7 @@ FeatureCMOV, FeatureXOP, FeatureFMA4, + Feature64Bit, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, @@ -1011,6 +1023,7 @@ FeatureCMOV, FeatureXOP, FeatureFMA4, + Feature64Bit, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, @@ -1040,6 +1053,7 @@ FeatureCMOV, FeatureXOP, FeatureFMA4, + Feature64Bit, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, @@ -1075,6 +1089,7 @@ FeatureNOPL, FeatureXOP, FeatureFMA4, + Feature64Bit, FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, @@ -1107,6 +1122,7 @@ FeatureCLFLUSHOPT, FeatureCLZERO, FeatureCMOV, + Feature64Bit, FeatureCMPXCHG16B, FeatureF16C, FeatureFMA, Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -230,14 +230,22 @@ if (CPUName.empty()) CPUName = "generic"; - // Make sure 64-bit features are available in 64-bit mode. (But make sure - // SSE2 can be turned off explicitly.) std::string FullFS = FS; if (In64BitMode) { + // SSE2 should default to enabled in 64-bit mode, but can be turned off + // explicitly. if (!FullFS.empty()) - FullFS = "+64bit,+sse2," + FullFS; + FullFS = "+sse2," + FullFS; else - FullFS = "+64bit,+sse2"; + FullFS = "+sse2"; + + // If no CPU was specified, enable 64bit feature to satisy later check. + if (CPUName == "generic") { + if (!FullFS.empty()) + FullFS = "+64bit," + FullFS; + else + FullFS = "+64bit"; + } } // LAHF/SAHF are always supported in non-64-bit mode. @@ -272,8 +280,9 @@ LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); - assert((!In64BitMode || HasX86_64) && - "64-bit code requested on a subtarget that doesn't support it!"); + if (In64BitMode && !HasX86_64) + report_fatal_error("64-bit code requested on a subtarget that doesn't " + "support it!"); // Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both // 32 and 64 bit) and for all 64-bit targets. Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2923,15 +2923,21 @@ Value *LHS, *RHS; SelectPatternFlavor SPF = matchSelectPattern(Op0, LHS, RHS).Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { - Value *X; - if (match(RHS, m_Not(m_Value(X)))) - std::swap(RHS, LHS); - - if (match(LHS, m_Not(m_Value(X)))) { + // It's possible we get here before the not has been simplied, so make + // sure the input to the not isn't freely invertible. + if (match(LHS, m_Not(m_Value(X))) && !IsFreeToInvert(X, X->hasOneUse())) { Value *NotY = Builder.CreateNot(RHS); return SelectInst::Create( Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY); } + + // It's possible we get here before the not has been simplied, so make + // sure the input to the not isn't freely invertible. + if (match(RHS, m_Not(m_Value(Y))) && !IsFreeToInvert(Y, Y->hasOneUse())) { + Value *NotX = Builder.CreateNot(LHS); + return SelectInst::Create( + Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y); + } } } Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1821,14 +1821,25 @@ // MAX(~a, ~b) -> ~MIN(a, b) // MIN(~a, ~b) -> ~MAX(a, b) Value *A, *B; - if (match(LHS, m_Not(m_Value(A))) && match(RHS, m_Not(m_Value(B))) && - (!LHS->hasNUsesOrMore(3) || !RHS->hasNUsesOrMore(3))) { - CmpInst::Predicate InvertedPred = getInverseMinMaxPred(SPF); - Value *InvertedCmp = Builder.CreateICmp(InvertedPred, A, B); - Value *NewSel = Builder.CreateSelect(InvertedCmp, A, B); - return BinaryOperator::CreateNot(NewSel); + if (match(LHS, m_Not(m_Value(A))) && !LHS->hasNUsesOrMore(3) && + // Passing false to only consider m_Not and constants. + IsFreeToInvert(RHS, false)) { + B = Builder.CreateNot(RHS); + Value *NewMinMax = createMinMax(Builder, getInverseMinMaxFlavor(SPF), + A, B); + return BinaryOperator::CreateNot(NewMinMax); } + if (match(RHS, m_Not(m_Value(B))) && !RHS->hasNUsesOrMore(3) && + // Passing false to only consider m_Not and constants. + IsFreeToInvert(LHS, false)) { + A = Builder.CreateNot(LHS); + Value *NewMinMax = createMinMax(Builder, getInverseMinMaxFlavor(SPF), + A, B); + return BinaryOperator::CreateNot(NewMinMax); + } + + if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder)) return I; } Index: test/CodeGen/X86/cpus.ll =================================================================== --- test/CodeGen/X86/cpus.ll +++ test/CodeGen/X86/cpus.ll @@ -2,7 +2,7 @@ ; ; First ensure the error message matches what we expect. ; CHECK-ERROR: not a recognized processor for this target -; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=foobar 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=foobar 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR ; ; Now ensure the error message doesn't occur for valid CPUs. ; CHECK-NO-ERROR-NOT: not a recognized processor for this target @@ -27,8 +27,9 @@ ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty -; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty @@ -55,7 +56,6 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=goldmont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=goldmont-plus 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty -; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty @@ -98,3 +98,40 @@ ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=winchip2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty + + +; Check that we reject 64-bit mode on 32-bit only CPUs. +; CHECK-ERROR64: LLVM ERROR: 64-bit code requested on a subtarget that doesn't support it! +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=i386 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=i486 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=i586 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pentium 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pentium-mmx 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=i686 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pentiumpro 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pentium2 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=k6 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=k6-2 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=k6-3 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=athlon 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=athlon-tbird 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=athlon-mp 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=geode 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=winchip-c6 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=winchip2 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=c3 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR64 +; RUN: not llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR + +define void @foo() { + ret void +} Index: test/CodeGen/X86/early-ifcvt-crash.ll =================================================================== --- test/CodeGen/X86/early-ifcvt-crash.ll +++ test/CodeGen/X86/early-ifcvt-crash.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -x86-early-ifcvt -verify-machineinstrs ; RUN: llc < %s -x86-early-ifcvt -stress-early-ifcvt -verify-machineinstrs ; CPU without a scheduling model: -; RUN: llc < %s -x86-early-ifcvt -mcpu=pentium3 -verify-machineinstrs +; RUN: llc < %s -x86-early-ifcvt -mcpu=k8 -verify-machineinstrs ; ; Run these tests with and without -stress-early-ifcvt to exercise heuristics. ; Index: test/CodeGen/X86/pr11985.ll =================================================================== --- test/CodeGen/X86/pr11985.ll +++ test/CodeGen/X86/pr11985.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=prescott | FileCheck %s --check-prefix=PRESCOTT +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=nocona | FileCheck %s --check-prefix=PRESCOTT ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM ;;; TODO: (1) Some of the loads and stores are certainly unaligned and (2) the first load and first Index: test/CodeGen/X86/pr34080.ll =================================================================== --- test/CodeGen/X86/pr34080.ll +++ test/CodeGen/X86/pr34080.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -mcpu=x86-64 | FileCheck %s --check-prefix=SSE2-SCHEDULE ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 | FileCheck %s --check-prefix=SSE3 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 -mcpu=prescott | FileCheck %s --check-prefix=SSE3 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 -mcpu=nocona | FileCheck %s --check-prefix=SSE3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -mcpu=sandybridge | FileCheck %s --check-prefix=AVX Index: test/CodeGen/X86/x86-64-double-shifts-var.ll =================================================================== --- test/CodeGen/X86/x86-64-double-shifts-var.ll +++ test/CodeGen/X86/x86-64-double-shifts-var.ll @@ -1,8 +1,3 @@ -; RUN: llc < %s -mtriple=x86_64-- -mcpu=athlon | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-- -mcpu=athlon-tbird | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-- -mcpu=athlon-4 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-- -mcpu=athlon-xp | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-- -mcpu=athlon-mp | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=k8 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=opteron | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- -mcpu=athlon64 | FileCheck %s Index: test/ThinLTO/X86/cache-config.ll =================================================================== --- test/ThinLTO/X86/cache-config.ll +++ test/ThinLTO/X86/cache-config.ll @@ -2,7 +2,7 @@ ; RUN: opt -module-hash -module-summary %s -o %t.bc ; RUN: llvm-lto2 run -o %t.o %t.bc -cache-dir %t.cache -r=%t.bc,globalfunc,plx -; RUN: llvm-lto2 run -o %t.o %t.bc -cache-dir %t.cache -r=%t.bc,globalfunc,plx -mcpu=yonah +; RUN: llvm-lto2 run -o %t.o %t.bc -cache-dir %t.cache -r=%t.bc,globalfunc,plx -mcpu=core2 ; RUN: llvm-lto2 run -o %t.o %t.bc -cache-dir %t.cache -r=%t.bc,globalfunc,plx -relax-elf-relocations ; RUN: llvm-lto2 run -o %t.o %t.bc -cache-dir %t.cache -r=%t.bc,globalfunc,plx -function-sections ; RUN: llvm-lto2 run -o %t.o %t.bc -cache-dir %t.cache -r=%t.bc,globalfunc,plx -data-sections Index: test/Transforms/InstCombine/max-of-nots.ll =================================================================== --- test/Transforms/InstCombine/max-of-nots.ll +++ test/Transforms/InstCombine/max-of-nots.ll @@ -3,7 +3,7 @@ define <2 x i32> @umin_of_nots(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @umin_of_nots( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> [[Y]] ; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP2]], ; CHECK-NEXT: ret <2 x i32> [[MIN]] @@ -17,7 +17,7 @@ define <2 x i32> @smin_of_nots(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @smin_of_nots( -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> [[Y]] ; CHECK-NEXT: [[MIN:%.*]] = xor <2 x i32> [[TMP2]], ; CHECK-NEXT: ret <2 x i32> [[MIN]] @@ -31,7 +31,7 @@ define i32 @compute_min_2(i32 %x, i32 %y) { ; CHECK-LABEL: @compute_min_2( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 [[Y]] ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -84,7 +84,7 @@ define i8 @umin3_not(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @umin3_not( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[Z:%.*]], [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Z]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: [[R_V:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[Y]] @@ -198,7 +198,7 @@ define i32 @compute_min_3(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @compute_min_3( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 [[Y]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[Z:%.*]] ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 [[Z]] @@ -239,8 +239,8 @@ ; CHECK-NEXT: [[NOT_VALUE:%.*]] = sub i32 3, [[X:%.*]] ; CHECK-NEXT: call void @fake_use(i32 [[NOT_VALUE]]) ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], [[Y:%.*]] -; CHECK-NEXT: [[MIN:%.*]] = select i1 [[TMP2]], i32 [[Y]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], [[Y:%.*]] +; CHECK-NEXT: [[MIN:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[Y]] ; CHECK-NEXT: ret i32 [[MIN]] ; %not_value = sub i32 3, %x Index: test/Transforms/InstCombine/select.ll =================================================================== --- test/Transforms/InstCombine/select.ll +++ test/Transforms/InstCombine/select.ll @@ -1333,9 +1333,9 @@ define i32 @PR27137(i32 %a) { ; CHECK-LABEL: @PR27137( -; CHECK-NEXT: [[NOT_A:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[NOT_A]], -1 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP1]], i32 [[NOT_A]], i32 -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 0 +; CHECK-NEXT: [[S1:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[S1]] ; %not_a = xor i32 %a, -1 Index: test/Transforms/InstCombine/select_meta.ll =================================================================== --- test/Transforms/InstCombine/select_meta.ll +++ test/Transforms/InstCombine/select_meta.ll @@ -194,12 +194,12 @@ ret i32 %retval } -; The compare should change, but the metadata remains the same because the select operands are not swapped. +; FIXME: Should we preserve the metadata here when we push the not through? define i32 @smin1(i32 %x) { ; CHECK-LABEL: @smin1( -; CHECK-NEXT: [[NOT_X:%.*]] = xor i32 %x, -1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[NOT_X]], -1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[NOT_X]], i32 -1, !prof ![[$MD1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0 +; CHECK-NEXT: [[SEL:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[SEL]] ; %not_x = xor i32 %x, -1 @@ -211,9 +211,9 @@ ; The compare should change, and the metadata is swapped because the select operands are swapped. define i32 @smin2(i32 %x) { ; CHECK-LABEL: @smin2( -; CHECK-NEXT: [[NOT_X:%.*]] = xor i32 %x, -1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[NOT_X]], -1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[NOT_X]], i32 -1, !prof ![[$MD3]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0 +; CHECK-NEXT: [[SEL:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[SEL]] ; %not_x = xor i32 %x, -1 @@ -225,9 +225,9 @@ ; The compare should change, but the metadata remains the same because the select operands are not swapped. define i32 @smax1(i32 %x) { ; CHECK-LABEL: @smax1( -; CHECK-NEXT: [[NOT_X:%.*]] = xor i32 %x, -1 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[NOT_X]], -1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[NOT_X]], i32 -1, !prof ![[$MD1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0 +; CHECK-NEXT: [[SEL:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[SEL]] ; %not_x = xor i32 %x, -1 @@ -239,9 +239,9 @@ ; The compare should change, and the metadata is swapped because the select operands are swapped. define i32 @smax2(i32 %x) { ; CHECK-LABEL: @smax2( -; CHECK-NEXT: [[NOT_X:%.*]] = xor i32 %x, -1 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[NOT_X]], -1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[NOT_X]], i32 -1, !prof ![[$MD3]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 0 +; CHECK-NEXT: [[SEL:%.*]] = xor i32 [[TMP2]], -1 ; CHECK-NEXT: ret i32 [[SEL]] ; %not_x = xor i32 %x, -1 Index: test/Transforms/InstCombine/sub.ll =================================================================== --- test/Transforms/InstCombine/sub.ll +++ test/Transforms/InstCombine/sub.ll @@ -1056,3 +1056,90 @@ %D = sub <2 x i32> , %C ret <2 x i32> %D } + +; Tests for (neg (max ~X, C)) -> ((min X, ~C) + 1). Same for min. +define i32 @test64(i32 %x) { +; CHECK-LABEL: @test64( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255 +; CHECK-NEXT: [[RES:%.*]] = add i32 [[TMP2]], 1 +; CHECK-NEXT: ret i32 [[RES]] +; + %1 = xor i32 %x, -1 + %2 = icmp sgt i32 %1, -256 + %3 = select i1 %2, i32 %1, i32 -256 + %res = sub i32 0, %3 + ret i32 %res +} + +define i32 @test65(i32 %x) { +; CHECK-LABEL: @test65( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -256 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -256 +; CHECK-NEXT: [[RES:%.*]] = add i32 [[TMP2]], 1 +; CHECK-NEXT: ret i32 [[RES]] +; + %1 = xor i32 %x, -1 + %2 = icmp slt i32 %1, 255 + %3 = select i1 %2, i32 %1, i32 255 + %res = sub i32 0, %3 + ret i32 %res +} + +define i32 @test66(i32 %x) { +; CHECK-LABEL: @test66( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], -101 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -101 +; CHECK-NEXT: [[RES:%.*]] = add i32 [[TMP2]], 1 +; CHECK-NEXT: ret i32 [[RES]] +; + %1 = xor i32 %x, -1 + %2 = icmp ugt i32 %1, 100 + %3 = select i1 %2, i32 %1, i32 100 + %res = sub i32 0, %3 + ret i32 %res +} + +define i32 @test67(i32 %x) { +; CHECK-LABEL: @test67( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], 100 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 100 +; CHECK-NEXT: [[RES:%.*]] = add i32 [[TMP2]], 1 +; CHECK-NEXT: ret i32 [[RES]] +; + %1 = xor i32 %x, -1 + %2 = icmp ult i32 %1, -101 + %3 = select i1 %2, i32 %1, i32 -101 + %res = sub i32 0, %3 + ret i32 %res +} + +; Check splat vectors too +define <2 x i32> @test68(<2 x i32> %x) { +; CHECK-LABEL: @test68( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> +; CHECK-NEXT: [[RES:%.*]] = add <2 x i32> [[TMP2]], +; CHECK-NEXT: ret <2 x i32> [[RES]] +; + %1 = xor <2 x i32> %x, + %2 = icmp sgt <2 x i32> %1, + %3 = select <2 x i1> %2, <2 x i32> %1, <2 x i32> + %res = sub <2 x i32> zeroinitializer, %3 + ret <2 x i32> %res +} + +; And non-splat constant vectors. +define <2 x i32> @test69(<2 x i32> %x) { +; CHECK-LABEL: @test69( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> +; CHECK-NEXT: [[RES:%.*]] = add <2 x i32> [[TMP2]], +; CHECK-NEXT: ret <2 x i32> [[RES]] +; + %1 = xor <2 x i32> %x, + %2 = icmp sgt <2 x i32> %1, + %3 = select <2 x i1> %2, <2 x i32> %1, <2 x i32> + %res = sub <2 x i32> zeroinitializer, %3 + ret <2 x i32> %res +} Index: test/Transforms/InstCombine/xor.ll =================================================================== --- test/Transforms/InstCombine/xor.ll +++ test/Transforms/InstCombine/xor.ll @@ -649,8 +649,8 @@ define i32 @test44(i32 %x, i32 %y) { ; CHECK-LABEL: @test44( ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 -4, [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[X]] ; CHECK-NEXT: ret i32 [[RES]] ; %z = add i32 %y, 3 ; thwart complexity-based canonicalization @@ -663,7 +663,7 @@ define i32 @test45(i32 %x, i32 %y) { ; CHECK-LABEL: @test45( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[Y]], i32 [[X]] ; CHECK-NEXT: ret i32 [[TMP2]] ; Index: test/Transforms/LoopUnroll/X86/partial.ll =================================================================== --- test/Transforms/LoopUnroll/X86/partial.ll +++ test/Transforms/LoopUnroll/X86/partial.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -S -loop-unroll -mcpu=nehalem | FileCheck %s -; RUN: opt < %s -S -loop-unroll -mcpu=core -unroll-runtime=0 | FileCheck -check-prefix=CHECK-NOUNRL %s +; RUN: opt < %s -S -loop-unroll -unroll-runtime=0 | FileCheck -check-prefix=CHECK-NOUNRL %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"