Index: lib/Target/ARM/ARMCodeGenPrepare.cpp =================================================================== --- lib/Target/ARM/ARMCodeGenPrepare.cpp +++ lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -136,6 +136,7 @@ const ARMSubtarget *ST = nullptr; IRPromoter *Promoter = nullptr; std::set AllVisited; + bool SafeToWriteGE = false; bool isSupportedValue(Value *V); bool isLegalToPromote(Value *V); @@ -658,6 +659,12 @@ if (!I) return false; + // Predicated on whether the sel intrinsic has been used within the module. + // If it has, conservatively choose to not generate any instructions that + // would overwrite the GE flags. + if (!SafeToWriteGE) + return false; + // If promotion is not safe, can we use a DSP instruction to natively // handle the narrow type? if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I)) @@ -789,6 +796,8 @@ bool ARMCodeGenPrepare::doInitialization(Module &M) { Promoter = new IRPromoter(&M); + SafeToWriteGE = + Intrinsic::getDeclaration(&M, Intrinsic::arm_sel)->hasNUses(0); return false; } Index: test/CodeGen/ARM/arm-cgp-sel.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/arm-cgp-sel.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=thumbv7em %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -arm-enable-scalar-dsp=true %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true %s -o - | FileCheck %s + +define i32 @use_sel(i32 %a, i32 %b) { + %1 = call i32 @llvm.arm.sel(i32 %a, i32 %b) + ret i32 %1 +} + +; Test that because the sel intrinsic is used above, we don't generate a dsp +; sub. +; CHECK-LABEL: dsp_disable: +; CHECK-NOT: usub +; CHECK-NOT: ssub +define void @dsp_disable(i8* %in, i8* %out, i8 %compare) { + %first = getelementptr inbounds i8, i8* %in, i32 0 + %second = getelementptr inbounds i8, i8* %in, i32 1 + %ld0 = load i8, i8* %first + %ld1 = load i8, i8* %second + %xor = xor i8 %ld0, -1 + %cmp = icmp ult i8 %compare, %ld1 + %select = select i1 %cmp, i8 %compare, i8 %xor + %sub = sub i8 %ld0, %select + store i8 %sub, i8* %out, align 1 + ret void +} + +declare i32 @llvm.arm.sel(i32, i32)