Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -4384,6 +4384,57 @@ return false; } +// Check if a condition of the type x < k ? k : x can be converted into a +// bit operation instead of conditional moves. +// Currently this is allowed given: +// - 32-bit operation (full register width) +// - The conditions and values match up +// - k is 0 or -1 (all ones) +// This function will not check the last condition, thats up tot he caller +// It returns true if the transformation can be made, and in such case +// returns x in V, and k in SatK. +static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, + SDValue &SatK) +{ + EVT VT = Op.getValueType(); + + if (VT != MVT::i32) { + return false; + } + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + SDValue TrueVal = Op.getOperand(2); + SDValue FalseVal = Op.getOperand(3); + + SDValue *K = isa(LHS) ? &LHS : isa(RHS) + ? &RHS + : nullptr; + + // No constant operation in comparison, early out + if (!K) { + return false; + } + + SDValue KTmp = isa(TrueVal) ? TrueVal : FalseVal; + V = (KTmp == TrueVal) ? FalseVal : TrueVal; + SDValue VTmp = (K && *K == LHS) ? RHS : LHS; + + // If the constant on left and right side, or variable on left and right, + // does not match, early out + if (*K != KTmp || V != VTmp) { + return false; + } + + if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) { + SatK = *K; + return true; + } + + return false; +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); @@ -4402,6 +4453,22 @@ DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); } + // Try to convert expressions of the form x < k ? k : x (and similar forms) into + // more efficient bit operations, which is possible when k is 0 or -1 + // On ARM and Thumb-2 which has flexible operand 2 this will result in single + // instructions. On Thumb the shift and the bit operation will be two instructions. + SDValue LowerSatConstant; + if (isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) { + SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue, DAG.getConstant(31, dl, VT)); + if (isNullConstant(LowerSatConstant)) { + SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV, + DAG.getAllOnesConstant(dl, VT)); + return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV); + } else if (isAllOnesConstant(LowerSatConstant)) { + return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV); + } + } + SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(4))->get(); Index: test/CodeGen/ARM/atomic-op.ll =================================================================== --- test/CodeGen/ARM/atomic-op.ll +++ test/CodeGen/ARM/atomic-op.ll @@ -129,11 +129,12 @@ store i32 %9, i32* %old call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() ; CHECK: ldrex - ; CHECK: cmp + ; CHECK: bic + ; CHECK-NOT: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_max_4 ; CHECK-T1-M0: bl ___sync_fetch_and_max_4 - ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL: bic ; CHECK-BAREMETAL-NOT: __sync %10 = atomicrmw max i32* %val2, i32 0 monotonic store i32 %10, i32* %old Index: test/CodeGen/ARM/sat-to-bitop.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/sat-to-bitop.ll @@ -0,0 +1,130 @@ +; RUN: llc -mtriple=arm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM --check-prefix=CHECK-CMP +; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T --check-prefix=CHECK-CMP +; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2 --check-prefix=CHECK-CMP + + +; Check for clipping against 0 that should result in bic +; +; Base tests with different bit widths +; + +; x < 0 ? 0 : x +; 32-bit base test +define i32 @sat0_base_32bit(i32 %x) #0 { +; CHECK-LABEL: sat0_base_32bit: +; CHECK-CMP-NOT: cmp +; CHECK-ARM: bic {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: bic.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31 +; CHECK-T-NEXT: bics {{r[0-9]}}, [[IM]] +entry: + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + ret i32 %saturateLow +} + +; x < 0 ? 0 : x +; 16-bit base test +define i16 @sat0_base_16bit(i16 %x) #0 { +; CHECK-LABEL: sat0_base_16bit: +; CHECK-CMP: cmp +; CHECK-ARM-NOT: bic +; CHECK-T2-NOT: bic.w +; CHECK-T-NOT: bics +entry: + %cmpLow = icmp slt i16 %x, 0 + %saturateLow = select i1 %cmpLow, i16 0, i16 %x + ret i16 %saturateLow +} + +; x < 0 ? 0 : x +; 8-bit base test +define i8 @sat0_base_8bit(i8 %x) #0 { +; CHECK-LABEL: sat0_base_8bit: +; CHECK-CMP: cmp +; CHECK-ARM-NOT: bic +; CHECK-T2-NOT: bic.w +entry: + %cmpLow = icmp slt i8 %x, 0 + %saturateLow = select i1 %cmpLow, i8 0, i8 %x + ret i8 %saturateLow +} + +; Test where the conditional is formed in a different way + +; x > 0 ? x : 0 +define i32 @sat0_lower_1(i32 %x) #0 { +; CHECK-LABEL: sat0_lower_1: +; CHECK-CMP-NOT: cmp +; CHECK-ARM: bic {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: bic.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31 +; CHECK-T-NEXT: bics {{r[0-9]}}, [[IM]] +entry: + %cmpGt = icmp sgt i32 %x, 0 + %saturateLow = select i1 %cmpGt, i32 %x, i32 0 + ret i32 %saturateLow +} + + +; Check for clipping against -1 that should result in orr +; +; Base tests with different bit widths +; + +; x < -1 ? -1 : x +; 32-bit base test +define i32 @sat1_base_32bit(i32 %x) #0 { +; CHECK-LABEL: sat1_base_32bit: +; CHECK-CMP-NOT: cmp +; CHECK-ARM: orr {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: orr.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31 +; CHECK-T-NEXT: orrs {{r[0-9]}}, [[IM]] +entry: + %cmpLow = icmp slt i32 %x, -1 + %saturateLow = select i1 %cmpLow, i32 -1, i32 %x + ret i32 %saturateLow +} + +; x < -1 ? -1 : x +; 16-bit base test +define i16 @sat1_base_16bit(i16 %x) #0 { +; CHECK-LABEL: sat1_base_16bit: +; CHECK-ARM: cmn +; CHECK-T2: cmp +; CHECK-T: cmp +entry: + %cmpLow = icmp slt i16 %x, -1 + %saturateLow = select i1 %cmpLow, i16 -1, i16 %x + ret i16 %saturateLow +} + +; x < -1 ? -1 : x +; 8-bit base test +define i8 @sat1_base_8bit(i8 %x) #0 { +; CHECK-LABEL: sat1_base_8bit: +; CHECK-ARM: cmn +; CHECK-T2: cmp +; CHECK-T: cmp +entry: + %cmpLow = icmp slt i8 %x, -1 + %saturateLow = select i1 %cmpLow, i8 -1, i8 %x + ret i8 %saturateLow +} + +; Test where the conditional is formed in a different way + +; x > -1 ? x : -1 +define i32 @sat1_lower_1(i32 %x) #0 { +; CHECK-LABEL: sat1_lower_1: +; CHECK-ARM: orr {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T2: orr.w {{r[0-9]}}, [[INPUT:r[0-9]]], [[INPUT]], asr #31 +; CHECK-T: asrs [[IM:r[0-9]]], {{r[0-9]}}, #31 +; CHECK-T-NEXT: orrs {{r[0-9]}}, [[IM]] +; CHECK-CMP-NOT: cmp +entry: + %cmpGt = icmp sgt i32 %x, -1 + %saturateLow = select i1 %cmpGt, i32 %x, i32 -1 + ret i32 %saturateLow +}