Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -116,6 +116,9 @@ def SDTIntShiftOp : SDTypeProfile<1, 2, [ // shl, sra, srl SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2> ]>; +def SDTIntSatNoShOp : SDTypeProfile<1, 2, [ // ssat with no shift + SDTCisSameAs<0, 1>, SDTCisInt<2> +]>; def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0> ]>; Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -60,6 +60,8 @@ CMOV, // ARM conditional move instructions. + SSAT, // Signed saturation + BCC_i64, SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1136,6 +1136,8 @@ case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::SSAT: return "ARMISD::SSAT"; + case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -3725,14 +3727,144 @@ } } +bool isGTorGE(ISD::CondCode CC) { return CC == ISD::SETGT || CC == ISD::SETGE; } + +bool isLTorLE(ISD::CondCode CC) { return CC == ISD::SETLT || CC == ISD::SETLE; } + +// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating. +// All of these conditions (and their <= and >= counterparts) will do: +// x < k ? k : x +// x > k ? x : k +// k < x ? x : k +// k > x ? k : x +bool isLowerSaturate(const SDValue LHS, const SDValue RHS, + const SDValue TrueVal, const SDValue FalseVal, + const ISD::CondCode CC, const SDValue K) { + return (isGTorGE(CC) && + ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) || + (isLTorLE(CC) && + ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); +} + +// Similar to isLowerSaturate(), but checks for upper-saturating conditions. +bool isUpperSaturate(const SDValue LHS, const SDValue RHS, + const SDValue TrueVal, const SDValue FalseVal, + const ISD::CondCode CC, const SDValue K) { + return (isGTorGE(CC) && + ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) || + (isLTorLE(CC) && + ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); +} + +// Check if two chained conditionals could be converted into SSAT. +// +// SSAT can replace a set of two conditional selectors that bound a number to an +// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: +// +// x < -k ? -k : (x > k ? k : x) +// x < -k ? -k : (x < k ? x : k) +// x > -k ? (x > k ? k : x) : -k +// x < k ? (x < -k ? -k : x) : k +// etc. +// +// It returns true if the conversion can be done, false otherwise. +// Additionally, the variable is returned in parameter V and the constant in K. +bool isSaturatingConditional(const SDValue &Op, SDValue &V, uint64_t &K) { + + SDValue LHS1 = Op.getOperand(0); + SDValue RHS1 = Op.getOperand(1); + SDValue TrueVal1 = Op.getOperand(2); + SDValue FalseVal1 = Op.getOperand(3); + ISD::CondCode CC1 = cast(Op.getOperand(4))->get(); + + const SDValue Op2 = isa(TrueVal1) ? FalseVal1 : TrueVal1; + if (Op2.getOpcode() != ISD::SELECT_CC) + return false; + + SDValue LHS2 = Op2.getOperand(0); + SDValue RHS2 = Op2.getOperand(1); + ISD::CondCode CC2 = cast(Op2.getOperand(4))->get(); + SDValue TrueVal2 = Op2.getOperand(2); + SDValue FalseVal2 = Op2.getOperand(3); + + // Find out which are the constants and which are the variables + // in each conditional + SDValue *K1 = isa(LHS1) ? &LHS1 : isa(RHS1) + ? &RHS1 + : NULL; + SDValue *K2 = isa(LHS2) ? &LHS2 : isa(RHS2) + ? &RHS2 + : NULL; + SDValue K2Tmp = isa(TrueVal2) ? TrueVal2 : FalseVal2; + SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1; + SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2; + SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2; + + // We must detect cases where the original operations worked with 16- or + // 8-bit values. In such case, V2Tmp != V2 because the comparison operations + // must work with sign-extended values but the select operations return + // the original non-extended value. + SDValue V2TmpReg = V2Tmp; + if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG) + V2TmpReg = V2Tmp->getOperand(0); + + // Check that the registers and the constants have the correct values + // in both conditionals + if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp || + V2TmpReg != V2) + return false; + + // Figure out which conditional is saturating the lower/upper bound. + const SDValue *LowerCheckOp = + isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) + ? &Op + : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 + : NULL; + const SDValue *UpperCheckOp = + isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) + ? &Op + : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 + : NULL; + + if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp) + return false; + + // Check that the constant in the lower-bound check is + // the opposite of the constant in the upper-bound check + // in 1's complement. + uint64_t Val1 = cast(*K1)->getSExtValue(); + uint64_t Val2 = cast(*K2)->getSExtValue(); + uint64_t PosVal = Val1 < Val2 ? Val1 : Val2; + + if (((Val1 < Val2 && UpperCheckOp == &Op) || + (Val1 > Val2 && (UpperCheckOp == &Op2))) && + Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) { + + V = V2; + K = PosVal; + return true; + } + + return false; +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc dl(Op); + + // Try to convert two saturating conditional selects into a single SSAT + SDValue SatValue; + uint64_t SatConstant; + if (isSaturatingConditional(Op, SatValue, SatConstant)) + return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, + DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); - SDLoc dl(Op); if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -128,6 +128,8 @@ def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; +def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; + def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; @@ -3715,6 +3717,8 @@ (SSAT imm1_32:$pos, GPRnopc:$a, 0)>; def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos), (USAT imm0_31:$pos, GPRnopc:$a, 0)>; +def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), + (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; //===----------------------------------------------------------------------===// // Bitwise Instructions. Index: lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- lib/Target/ARM/ARMInstrThumb2.td +++ lib/Target/ARM/ARMInstrThumb2.td @@ -2287,6 +2287,8 @@ def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>; +def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), + (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; //===----------------------------------------------------------------------===// // Shift and rotate Instructions. Index: test/CodeGen/ARM/ssat.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/ssat.ll @@ -0,0 +1,215 @@ +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s + +; Check for several conditions that should result in SSAT. +; For example, the base test is equivalent to +; x < -k ? -k : (x > k ? k : x) in C. All patterns that bound x +; to the interval [-k, k] where k is a power of 2 can be +; transformed into SSAT. At the end there are some tests +; checking that conditionals are not transformed if they don't +; match the right pattern. + +; +; Base tests with different bit widths +; + +; x < -k ? -k : (x > k ? k : x) +; 32-bit base test +define i32 @sat_base_32bit(i32 %x) #0 { +; CHECK-LABEL: sat_base_32bit: +; CHECK: ssat r0, #24, r0 +entry: + %cmpLow = icmp slt i32 %x, -8388608 + %cmpUp = icmp sgt i32 %x, 8388607 + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x + %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %saturateUp + ret i32 %saturateLow +} + +; x < -k ? -k : (x > k ? k : x) +; 16-bit base test +define i16 @sat_base_16bit(i16 %x) #0 { +; CHECK-LABEL: sat_base_16bit: +; CHECK: ssat r0, #12, r0 +entry: + %cmpLow = icmp slt i16 %x, -2048 + %cmpUp = icmp sgt i16 %x, 2047 + %saturateUp = select i1 %cmpUp, i16 2047, i16 %x + %saturateLow = select i1 %cmpLow, i16 -2048, i16 %saturateUp + ret i16 %saturateLow +} + +; x < -k ? -k : (x > k ? k : x) +; 8-bit base test +define i8 @sat_base_8bit(i8 %x) #0 { +; CHECK-LABEL: sat_base_8bit: +; CHECK: ssat r0, #6, r0 +entry: + %cmpLow = icmp slt i8 %x, -32 + %cmpUp = icmp sgt i8 %x, 31 + %saturateUp = select i1 %cmpUp, i8 31, i8 %x + %saturateLow = select i1 %cmpLow, i8 -32, i8 %saturateUp + ret i8 %saturateLow +} + +; +; Tests where the conditionals that check for upper and lower bounds, +; or the < and > operators, are arranged in different ways. Only some +; of the possible combinations that lead to SSAT are tested. +; + +; x < -k ? -k : (x < k ? x : k) +define i32 @sat_lower_upper_1(i32 %x) #0 { +; CHECK-LABEL: sat_lower_upper_1: +; CHECK: ssat r0, #24, r0 +entry: + %cmpLow = icmp slt i32 %x, -8388608 + %cmpUp = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %cmpUp, i32 %x, i32 8388607 + %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %saturateUp + ret i32 %saturateLow +} + +; x > -k ? (x > k ? k : x) : -k +define i32 @sat_lower_upper_2(i32 %x) #0 { +; CHECK-LABEL: sat_lower_upper_2: +; CHECK: ssat r0, #24, r0 +entry: + %cmpLow = icmp sgt i32 %x, -8388608 + %cmpUp = icmp sgt i32 %x, 8388607 + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x + %saturateLow = select i1 %cmpLow, i32 %saturateUp, i32 -8388608 + ret i32 %saturateLow +} + +; x < k ? (x < -k ? -k : x) : k +define i32 @sat_upper_lower_1(i32 %x) #0 { +; CHECK-LABEL: sat_upper_lower_1: +; CHECK: ssat r0, #24, r0 +entry: + %cmpUp = icmp slt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, -8388608 + %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x + %saturateUp = select i1 %cmpUp, i32 %saturateLow, i32 8388607 + ret i32 %saturateUp +} + +; x > k ? k : (x < -k ? -k : x) +define i32 @sat_upper_lower_2(i32 %x) #0 { +; CHECK-LABEL: sat_upper_lower_2: +; CHECK: ssat r0, #24, r0 +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, -8388608 + %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; k < x ? k : (x > -k ? x : -k) +define i32 @sat_upper_lower_3(i32 %x) #0 { +; CHECK-LABEL: sat_upper_lower_3: +; CHECK: ssat r0, #24, r0 +entry: + %cmpUp = icmp slt i32 8388607, %x + %cmpLow = icmp sgt i32 %x, -8388608 + %saturateLow = select i1 %cmpLow, i32 %x, i32 -8388608 + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; +; Miscellanea +; + +; Check that >= and <= work the same as > and < +; k <= x ? k : (x >= -k ? x : -k) +define i32 @sat_le_ge(i32 %x) #0 { +; CHECK-LABEL: sat_le_ge: +; CHECK: ssat r0, #24, r0 +entry: + %cmpUp = icmp sle i32 8388607, %x + %cmpLow = icmp sge i32 %x, -8388608 + %saturateLow = select i1 %cmpLow, i32 %x, i32 -8388608 + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; +; The following tests check for patterns that should not transform +; into SSAT but are similar enough that could confuse the selector. +; + +; x > k ? k : (x > -k ? -k : x) +; First condition upper-saturates, second doesn't lower-saturate. +define i32 @no_sat_missing_lower(i32 %x) #0 { +; CHECK-LABEL: no_sat_missing_lower +; CHECK-NOT: ssat r0, #24, r0 +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp sgt i32 %x, -8388608 + %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; x < k ? k : (x < -k ? -k : x) +; Second condition lower-saturates, first doesn't upper-saturate. +define i32 @no_sat_missing_upper(i32 %x) #0 { +; CHECK-LABEL: no_sat_missing_upper: +; CHECK-NOT: ssat r0, #24, r0 +entry: + %cmpUp = icmp slt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, -8388608 + %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; Lower constant is different in the select and in the compare +define i32 @no_sat_incorrect_constant(i32 %x) #0 { +; CHECK-LABEL: no_sat_incorrect_constant: +; CHECK-NOT: ssat r0, #24, r0 +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, -8388608 + %saturateLow = select i1 %cmpLow, i32 -8388607, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; The interval is not [k, ~k] +define i32 @no_sat_incorrect_interval(i32 %x) #0 { +; CHECK-LABEL: no_sat_incorrect_interval: +; CHECK-NOT: ssat r0, #24, r0 +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, -19088744 + %saturateLow = select i1 %cmpLow, i32 -19088744, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; The returned value (y) is not the same as the tested value (x). +define i32 @no_sat_incorrect_return(i32 %x, i32 %y) #0 { +; CHECK-LABEL: no_sat_incorrect_return: +; CHECK-NOT: ssat r0, #24, r0 +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, -8388608 + %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %y + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; One of the values in a compare (y) is not the same as the rest +; of the compare and select values (x). +define i32 @no_sat_incorrect_compare(i32 %x, i32 %y) #0 { +; CHECK-LABEL: no_sat_incorrect_compare: +; CHECK-NOT: ssat r0, #24, r0 +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %y, -8388608 + %saturateLow = select i1 %cmpLow, i32 -8388608, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +}