Index: include/llvm/Target/TargetSubtargetInfo.h =================================================================== --- include/llvm/Target/TargetSubtargetInfo.h +++ include/llvm/Target/TargetSubtargetInfo.h @@ -167,7 +167,7 @@ virtual bool useAA() const; /// \brief Enable the use of the early if conversion pass. - virtual bool enableEarlyIfConversion() const { return false; } + virtual bool enableEarlyIfConversion(const MachineFunction &MF) const { return false; } /// \brief Return PBQPConstraint(s) for the target. /// Index: lib/CodeGen/EarlyIfConversion.cpp =================================================================== --- lib/CodeGen/EarlyIfConversion.cpp +++ lib/CodeGen/EarlyIfConversion.cpp @@ -778,7 +778,7 @@ << "********** Function: " << MF.getName() << '\n'); // Only run if conversion if the target wants it. const TargetSubtargetInfo &STI = MF.getSubtarget(); - if (!STI.enableEarlyIfConversion()) + if (!STI.enableEarlyIfConversion(MF)) return false; TII = STI.getInstrInfo(); Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -142,7 +142,7 @@ MachineInstr *end, unsigned NumRegionInstrs) const override; - bool enableEarlyIfConversion() const override; + bool enableEarlyIfConversion(const MachineFunction &MF) const override; std::unique_ptr getCustomPBQPConstraints() const override; }; Index: lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- lib/Target/AArch64/AArch64Subtarget.cpp +++ lib/Target/AArch64/AArch64Subtarget.cpp @@ -120,7 +120,7 @@ Policy.OnlyBottomUp = false; } -bool AArch64Subtarget::enableEarlyIfConversion() const { +bool AArch64Subtarget::enableEarlyIfConversion(const MachineFunction &MF) const { return EnableEarlyIfConvert; } Index: lib/Target/PowerPC/PPC.td =================================================================== --- lib/Target/PowerPC/PPC.td +++ lib/Target/PowerPC/PPC.td @@ -84,6 +84,9 @@ "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">; def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; +def FeatureSlowISEL : SubtargetFeature<"slow-isel","HasSlowISEL", "true", + "Using isel is usually slower than using branches", + [FeatureISEL]>; def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", "Enable the popcnt[dw] instructions">; def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", @@ -262,7 +265,7 @@ FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeatureFPRND, FeatureFPCVT, FeatureISEL, FeatureSlowISEL, FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto, Feature64Bit /*, Feature64BitRegs */, FeatureICBT, FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]; @@ -342,7 +345,7 @@ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, - FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeatureFPRND, FeatureFPCVT, FeatureISEL, FeatureSlowISEL, FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]>; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -8297,10 +8297,11 @@ MachineFunction *F = BB->getParent(); - if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8 || - MI->getOpcode() == PPC::SELECT_I4 || - MI->getOpcode() == PPC::SELECT_I8)) { + if (Subtarget.enableEarlyIfConversion(*F) && + (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_I4 || + MI->getOpcode() == PPC::SELECT_I8)) { SmallVector Cond; if (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8) Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -596,7 +596,7 @@ const SmallVectorImpl &Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { - if (!Subtarget.hasISEL()) + if (!Subtarget.enableEarlyIfConversion(*MBB.getParent())) return false; if (Cond.size() != 2) Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -20,6 +20,7 @@ #include "PPCSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -100,6 +101,7 @@ bool HasFPRND; bool HasFPCVT; bool HasISEL; + bool HasSlowISEL; bool HasPOPCNTD; bool HasCMPB; bool HasLDBRX; @@ -223,6 +225,7 @@ bool hasP8Crypto() const { return HasP8Crypto; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } + bool hasSlowISEL() const { return HasSlowISEL; } bool hasPOPCNTD() const { return HasPOPCNTD; } bool hasCMPB() const { return HasCMPB; } bool hasLDBRX() const { return HasLDBRX; } @@ -261,7 +264,15 @@ bool isSVR4ABI() const { return !isDarwinABI(); } bool isELFv2ABI() const; - bool enableEarlyIfConversion() const override { return hasISEL(); } + // For some targets, using branches is almost always better than using + // select: the decision outcome needs to be almost completely random for isel + // to win, otherwise the branch predictor can predict the outcome. For such + // targets, only use select when optimizing for size. + bool enableEarlyIfConversion(const MachineFunction &MF) const override { + return hasISEL() && + (!hasSlowISEL() || + MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)); + } // Scheduling customization. bool enableMachineScheduler() const override; Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -483,7 +483,7 @@ /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } - bool enableEarlyIfConversion() const override; + bool enableEarlyIfConversion(const MachineFunction &MF) const override; /// Return the instruction itineraries based on the subtarget selection. const InstrItineraryData *getInstrItineraryData() const override { Index: test/CodeGen/PowerPC/crbit-asm.ll =================================================================== --- test/CodeGen/PowerPC/crbit-asm.ll +++ test/CodeGen/PowerPC/crbit-asm.ll @@ -55,5 +55,5 @@ ; CHECK: blr } -attributes #0 = { nounwind } +attributes #0 = { nounwind optsize } Index: test/CodeGen/PowerPC/crbits.ll =================================================================== --- test/CodeGen/PowerPC/crbits.ll +++ test/CodeGen/PowerPC/crbits.ll @@ -188,5 +188,5 @@ ; CHECK: blr } -attributes #0 = { nounwind readnone } +attributes #0 = { nounwind readnone optsize } Index: test/CodeGen/PowerPC/fold-zero.ll =================================================================== --- test/CodeGen/PowerPC/fold-zero.ll +++ test/CodeGen/PowerPC/fold-zero.ll @@ -3,7 +3,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" -define i32 @test1(i1 %a, i32 %c) nounwind { +define i32 @test1(i1 %a, i32 %c) nounwind optsize { %x = select i1 %a, i32 %c, i32 0 ret i32 %x @@ -13,7 +13,7 @@ ; CHECK: blr } -define i32 @test2(i1 %a, i32 %c) nounwind { +define i32 @test2(i1 %a, i32 %c) nounwind optsize { %x = select i1 %a, i32 0, i32 %c ret i32 %x Index: test/CodeGen/PowerPC/i1-ext-fold.ll =================================================================== --- test/CodeGen/PowerPC/i1-ext-fold.ll +++ test/CodeGen/PowerPC/i1-ext-fold.ll @@ -50,5 +50,5 @@ ; CHECK: blr } -attributes #0 = { nounwind readnone } +attributes #0 = { nounwind readnone optsize } Index: test/CodeGen/PowerPC/i64-to-float.ll =================================================================== --- test/CodeGen/PowerPC/i64-to-float.ll +++ test/CodeGen/PowerPC/i64-to-float.ll @@ -3,7 +3,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" -define float @foo(i64 %a) nounwind { +define float @foo(i64 %a) nounwind optsize { entry: %x = sitofp i64 %a to float ret float %x @@ -21,7 +21,7 @@ ; CHECK-VSX: blr } -define double @goo(i64 %a) nounwind { +define double @goo(i64 %a) nounwind optsize { entry: %x = sitofp i64 %a to double ret double %x @@ -39,7 +39,7 @@ ; CHECK-VSX: blr } -define float @foou(i64 %a) nounwind { +define float @foou(i64 %a) nounwind optsize { entry: %x = uitofp i64 %a to float ret float %x @@ -57,7 +57,7 @@ ; CHECK-VSX: blr } -define double @goou(i64 %a) nounwind { +define double @goou(i64 %a) nounwind optsize { entry: %x = uitofp i64 %a to double ret double %x Index: test/CodeGen/PowerPC/i64_fp_round.ll =================================================================== --- test/CodeGen/PowerPC/i64_fp_round.ll +++ test/CodeGen/PowerPC/i64_fp_round.ll @@ -2,7 +2,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" -define float @test(i64 %x) nounwind readnone { +define float @test(i64 %x) nounwind readnone optsize { entry: %conv = sitofp i64 %x to float ret float %conv Index: test/CodeGen/PowerPC/ifcvt.ll =================================================================== --- test/CodeGen/PowerPC/ifcvt.ll +++ test/CodeGen/PowerPC/ifcvt.ll @@ -2,7 +2,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" -define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) { +define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) #0 { entry: %sext82 = shl i32 %d, 16 %conv29 = ashr exact i32 %sext82, 16 @@ -32,3 +32,4 @@ ret i32 %sub43 } +attributes #0 = { optsize } Index: test/CodeGen/PowerPC/isel.ll =================================================================== --- test/CodeGen/PowerPC/isel.ll +++ test/CodeGen/PowerPC/isel.ll @@ -3,7 +3,7 @@ ; RUN: llc -mcpu=a2 < %s | FileCheck %s ; RUN: llc -mcpu=pwr7 < %s | FileCheck %s -define i64 @test1(i64 %a, i64 %b, i64 %c, i64 %d) { +define i64 @test1(i64 %a, i64 %b, i64 %c, i64 %d) #0 { entry: %p = icmp uge i64 %a, %b %x = select i1 %p, i64 %c, i64 %d @@ -12,7 +12,7 @@ ; CHECK: isel } -define i32 @test2(i32 %a, i32 %b, i32 %c, i32 %d) { +define i32 @test2(i32 %a, i32 %b, i32 %c, i32 %d) #0 { entry: %p = icmp uge i32 %a, %b %x = select i1 %p, i32 %c, i32 %d @@ -21,3 +21,4 @@ ; CHECK: isel } +attributes #0 = { optsize } Index: test/CodeGen/PowerPC/no-isel.ll =================================================================== --- test/CodeGen/PowerPC/no-isel.ll +++ test/CodeGen/PowerPC/no-isel.ll @@ -0,0 +1,18 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-ibm-linux-gnu" + +define signext i32 @i64cmp(i8* nocapture readonly %a, i8* nocapture readonly %b) { +entry: + %0 = bitcast i8* %a to i64* + %1 = load i64, i64* %0, align 8 + %2 = bitcast i8* %b to i64* + %3 = load i64, i64* %2, align 8 + %cmp = icmp slt i64 %1, %3 + %cmp1 = icmp sgt i64 %1, %3 + %. = zext i1 %cmp1 to i32 + %retval.0 = select i1 %cmp, i32 -1, i32 %. + ret i32 %retval.0 +; CHECK-NOT: isel +} Index: test/CodeGen/PowerPC/p8-isel-sched.ll =================================================================== --- test/CodeGen/PowerPC/p8-isel-sched.ll +++ test/CodeGen/PowerPC/p8-isel-sched.ll @@ -29,5 +29,5 @@ ; CHECK: isel ; CHECK: blr -attributes #0 = { nounwind } +attributes #0 = { nounwind optsize } Index: test/CodeGen/PowerPC/subreg-postra-2.ll =================================================================== --- test/CodeGen/PowerPC/subreg-postra-2.ll +++ test/CodeGen/PowerPC/subreg-postra-2.ll @@ -171,5 +171,5 @@ unreachable } -attributes #0 = { nounwind } +attributes #0 = { nounwind optsize } Index: test/CodeGen/PowerPC/subreg-postra.ll =================================================================== --- test/CodeGen/PowerPC/subreg-postra.ll +++ test/CodeGen/PowerPC/subreg-postra.ll @@ -163,6 +163,6 @@ declare void @__brelse(i32*) -attributes #0 = { nounwind } -attributes #1 = { nounwind } +attributes #0 = { nounwind optsize } +attributes #1 = { nounwind optsize }