Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -4370,6 +4370,28 @@ SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); + // Try to convert expressions of the form x < k ? k : x (and similar forms) into + // more efficient bit operations, which is possible when k is 0 or -1 + // On ARM and Thumb-2 which has flexible operand 2 this will result in single + // instructions. On Thumb the shift and the bit operation will be two instructions. + SDValue *K = isa(LHS) ? &LHS : isa(RHS) + ? &RHS + : nullptr; + SDValue KTmp = isa(TrueVal) ? TrueVal : FalseVal; + SDValue V = (KTmp == TrueVal) ? FalseVal : TrueVal; + + if (K && isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K) && *K == KTmp) { + int64_t KVal = cast(*K)->getSExtValue(); + + SDValue shiftV = DAG.getNode(ISD::SRA, dl, VT, V, DAG.getConstant(31, dl, VT)); + if (KVal == 0) { + SDValue not_shiftV = DAG.getNode(ISD::XOR, dl, VT, shiftV, DAG.getConstant(-1, dl, VT)); + return DAG.getNode(ISD::AND, dl, VT, V, not_shiftV); + } else if (KVal == -1) { + return DAG.getNode(ISD::OR, dl, VT, V, shiftV); + } + } + if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, dl); Index: test/CodeGen/ARM/atomic-op.ll =================================================================== --- test/CodeGen/ARM/atomic-op.ll +++ test/CodeGen/ARM/atomic-op.ll @@ -129,11 +129,11 @@ store i32 %9, i32* %old call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex - ; CHECK: cmp + ; CHECK: bic ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_max_4 ; CHECK-T1-M0: bl ___sync_fetch_and_max_4 - ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL: bic ; CHECK-BAREMETAL-NOT: __sync %10 = atomicrmw max i32* %val2, i32 0 monotonic store i32 %10, i32* %old Index: test/CodeGen/ARM/sat-to-bitop.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/sat-to-bitop.ll @@ -0,0 +1,124 @@ +; RUN: llc -mtriple=arm %s -o - | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s --check-prefix=CHECK-T +; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s --check-prefix=CHECK-T2 + + +; Check for clipping against 0 that should result in bic +; +; Base tests with different bit widths +; + +; x < 0 ? 0 : x +; 32-bit base test +define i32 @sat0_base_32bit(i32 %x) #0 { +; CHECK-LABEL: sat0_base_32bit: +; CHECK: bic {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: bic.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31 +; CHECK-T-NEXT: bics {{r[0-9]}}, [[IM]] +entry: + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + ret i32 %saturateLow +} + +; x < 0 ? 0 : x +; 16-bit base test +define i16 @sat0_base_16bit(i16 %x) #0 { +; CHECK-LABEL: sat0_base_16bit: +; CHECK: bic {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: bic.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31 +; CHECK-T-NEXT: bics {{r[0-9]}}, [[IM]] +entry: + %cmpLow = icmp slt i16 %x, 0 + %saturateLow = select i1 %cmpLow, i16 0, i16 %x + ret i16 %saturateLow +} + +; x < 0 ? 0 : x +; 8-bit base test +define i8 @sat0_base_8bit(i8 %x) #0 { +; CHECK-LABEL: sat0_base_8bit: +; CHECK: bic {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: bic.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31 +; CHECK-T-NEXT: bics {{r[0-9]}}, [[IM]] +entry: + %cmpLow = icmp slt i8 %x, 0 + %saturateLow = select i1 %cmpLow, i8 0, i8 %x + ret i8 %saturateLow +} + +; Test where the conditional is formed in a different way + +; x > 0 ? x : 0 +define i32 @sat0_lower_1(i32 %x) #0 { +; CHECK-LABEL: sat0_lower_1: +; CHECK: bic {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: bic.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31 +; CHECK-T-NEXT: bics {{r[0-9]}}, [[IM]] +entry: + %cmpGt = icmp sgt i32 %x, 0 + %saturateLow = select i1 %cmpGt, i32 %x, i32 0 + ret i32 %saturateLow +} + + +; Check for clipping against -1 that should result in orr +; +; Base tests with different bit widths +; + +; x < -1 ? -1 : x +; 32-bit base test +define i32 @sat1_base_32bit(i32 %x) #0 { +; CHECK-LABEL: sat1_base_32bit: +; CHECK: orr {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: orr.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31 +; CHECK-T-NEXT: orrs {{r[0-9]}}, [[IM]] +entry: + %cmpLow = icmp slt i32 %x, -1 + %saturateLow = select i1 %cmpLow, i32 -1, i32 %x + ret i32 %saturateLow +} + +; x < -1 ? -1 : x +; 16-bit base test +; Note this currently fails due to combination of constant hoisting and bitcasts +define i16 @sat1_base_16bit(i16 %x) #0 { +; CHECK-LABEL: sat1_base_16bit: +entry: + %cmpLow = icmp slt i16 %x, -1 + %saturateLow = select i1 %cmpLow, i16 -1, i16 %x + ret i16 %saturateLow +} + +; x < -1 ? -1 : x +; 8-bit base test +define i8 @sat1_base_8bit(i8 %x) #0 { +; CHECK-LABEL: sat1_base_8bit: +; CHECK: orr {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: orr.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +entry: + %cmpLow = icmp slt i8 %x, -1 + %saturateLow = select i1 %cmpLow, i8 -1, i8 %x + ret i8 %saturateLow +} + +; Test where the conditional is formed in a different way + +; x > -1 ? x : -1 +define i32 @sat1_lower_1(i32 %x) #0 { +; CHECK-LABEL: sat1_lower_1: +; CHECK: orr {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: orr.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31 +; CHECK-T-NEXT: orrs {{r[0-9]}}, [[IM]] +entry: + %cmpGt = icmp sgt i32 %x, -1 + %saturateLow = select i1 %cmpGt, i32 %x, i32 -1 + ret i32 %saturateLow +} Index: test/CodeGen/ARM/select.ll =================================================================== --- test/CodeGen/ARM/select.ll +++ test/CodeGen/ARM/select.ll @@ -62,7 +62,7 @@ define double @f7(double %a, double %b) { ;CHECK-LABEL: f7: -;CHECK: movlt +;CHECK: bic ;CHECK: movge ;CHECK-VFP-LABEL: f7: ;CHECK-VFP: vmovmi Index: test/CodeGen/Thumb/select.ll =================================================================== --- test/CodeGen/Thumb/select.ll +++ test/CodeGen/Thumb/select.ll @@ -73,8 +73,8 @@ ret double %tmp1 } ; CHECK-LABEL: f7: -; CHECK: blt -; CHECK: {{blt|bge}} +; CHECK: bge +; CHECK: bic ; CHECK: __ltdf2 ; CHECK-EABI-LABEL: f7: ; CHECK-EABI: __aeabi_dcmplt