Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -87,6 +87,7 @@ CMOV, // ARM conditional move instructions. SSAT, // Signed saturation + USAT, // Unsigned saturation BCC_i64, Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1255,6 +1255,7 @@ case ARMISD::CMOV: return "ARMISD::CMOV"; case ARMISD::SSAT: return "ARMISD::SSAT"; + case ARMISD::USAT: return "ARMISD::USAT"; case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; @@ -4205,7 +4206,7 @@ ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); } -// Check if two chained conditionals could be converted into SSAT. +// Check if two chained conditionals could be converted into SSAT or USAT. // // SSAT can replace a set of two conditional selectors that bound a number to an // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: @@ -4216,10 +4217,14 @@ // x < k ? (x < -k ? -k : x) : k // etc. // +// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is +// a power of 2. +// // It returns true if the conversion can be done, false otherwise. -// Additionally, the variable is returned in parameter V and the constant in K. +// Additionally, the variable is returned in parameter V, the constant in K and +// usat is set to true if the conditional represents an unsigned saturation static bool isSaturatingConditional(const SDValue &Op, SDValue &V, - uint64_t &K) { + uint64_t &K, bool &usat) { SDValue LHS1 = Op.getOperand(0); SDValue RHS1 = Op.getOperand(1); SDValue TrueVal1 = Op.getOperand(2); @@ -4286,13 +4291,23 @@ int64_t Val1 = cast(*K1)->getSExtValue(); int64_t Val2 = cast(*K2)->getSExtValue(); int64_t PosVal = std::max(Val1, Val2); + int64_t NegVal = std::min(Val1, Val2); if (((Val1 > Val2 && UpperCheckOp == &Op) || (Val1 < Val2 && UpperCheckOp == &Op2)) && - Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) { + isPowerOf2_64(PosVal + 1)) { + + // Handle the difference between USAT (unsigned) and SSAT (signed) saturation + if (Val1 == ~Val2) + usat = false; + else if (NegVal == 0) + usat = true; + else + return false; V = V2; K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive + return true; } @@ -4306,10 +4321,16 @@ // Try to convert two saturating conditional selects into a single SSAT SDValue SatValue; uint64_t SatConstant; + bool SatUSat; if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) && - isSaturatingConditional(Op, SatValue, SatConstant)) - return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, - DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) { + if (SatUSat) + return DAG.getNode(ARMISD::USAT, dl, VT, SatValue, + DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + else + return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, + DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + } SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -139,6 +139,8 @@ def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; +def ARMusatnoshift : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>; + def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; @@ -3832,6 +3834,8 @@ (USAT imm0_31:$pos, GPRnopc:$a, 0)>; def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : ARMPat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm), + (USAT imm0_31:$imm, GPRnopc:$Rn, 0)>; def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos), (SSAT16 imm1_16:$pos, GPRnopc:$a)>; def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos), Index: lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- lib/Target/ARM/ARMInstrThumb2.td +++ lib/Target/ARM/ARMInstrThumb2.td @@ -2336,6 +2336,8 @@ def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : T2Pat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm), + (t2USAT imm0_31:$imm, GPRnopc:$Rn, 0)>; def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), Index: test/CodeGen/ARM/usat.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/usat.ll @@ -0,0 +1,205 @@ +; RUN: llc -mtriple=armv4t-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V4T +; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V6T2 + +; Check for several conditions that should result in USAT. +; For example, the base test is equivalent to +; x < 0 ? 0 : (x > k ? k : x) in C. All patterns that bound x +; to the interval [0, k] where k + 1 is a power of 2 can be +; transformed into USAT. At the end there are some tests +; checking that conditionals are not transformed if they don't +; match the right pattern. + +; +; Base tests with different bit widths +; + +; x < 0 ? 0 : (x > k ? k : x) +; 32-bit base test +define i32 @unsigned_sat_base_32bit(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_base_32bit: +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpLow = icmp slt i32 %x, 0 + %cmpUp = icmp sgt i32 %x, 8388607 + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x + %saturateLow = select i1 %cmpLow, i32 0, i32 %saturateUp + ret i32 %saturateLow +} + +; x < 0 ? 0 : (x > k ? k : x) +; 16-bit base test +define i16 @unsigned_sat_base_16bit(i16 %x) #0 { +; CHECK-LABEL: unsigned_sat_base_16bit: +; V6T2: usat r0, #11, r0 +; V4T-NOT: usat +entry: + %cmpLow = icmp slt i16 %x, 0 + %cmpUp = icmp sgt i16 %x, 2047 + %saturateUp = select i1 %cmpUp, i16 2047, i16 %x + %saturateLow = select i1 %cmpLow, i16 0, i16 %saturateUp + ret i16 %saturateLow +} + +; x < 0 ? 0 : (x > k ? k : x) +; 8-bit base test +define i8 @unsigned_sat_base_8bit(i8 %x) #0 { +; CHECK-LABEL: unsigned_sat_base_8bit: +; V6T2: usat r0, #5, r0 +; V4T-NOT: usat +entry: + %cmpLow = icmp slt i8 %x, 0 + %cmpUp = icmp sgt i8 %x, 31 + %saturateUp = select i1 %cmpUp, i8 31, i8 %x + %saturateLow = select i1 %cmpLow, i8 0, i8 %saturateUp + ret i8 %saturateLow +} + +; +; Tests where the conditionals that check for upper and lower bounds, +; or the < and > operators, are arranged in different ways. Only some +; of the possible combinations that lead to USAT are tested. +; +; x < 0 ? 0 : (x < k ? x : k) +define i32 @unsigned_sat_lower_upper_1(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_lower_upper_1: +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpLow = icmp slt i32 %x, 0 + %cmpUp = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %cmpUp, i32 %x, i32 8388607 + %saturateLow = select i1 %cmpLow, i32 0, i32 %saturateUp + ret i32 %saturateLow +} + +; x > 0 ? (x > k ? k : x) : 0 +define i32 @unsigned_sat_lower_upper_2(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_lower_upper_2: +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpLow = icmp sgt i32 %x, 0 + %cmpUp = icmp sgt i32 %x, 8388607 + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x + %saturateLow = select i1 %cmpLow, i32 %saturateUp, i32 0 + ret i32 %saturateLow +} + +; x < k ? (x < 0 ? 0 : x) : k +define i32 @unsigned_sat_upper_lower_1(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_upper_lower_1: +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpUp = icmp slt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %saturateUp = select i1 %cmpUp, i32 %saturateLow, i32 8388607 + ret i32 %saturateUp +} + +; x > k ? k : (x < 0 ? 0 : x) +define i32 @unsigned_sat_upper_lower_2(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_upper_lower_2: +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; k < x ? k : (x > 0 ? x : 0) +define i32 @unsigned_sat_upper_lower_3(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_upper_lower_3: +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpUp = icmp slt i32 8388607, %x + %cmpLow = icmp sgt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 %x, i32 0 + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; +; The following tests check for patterns that should not transform +; into USAT but are similar enough that could confuse the selector. +; +; x > k ? k : (x > 0 ? 0 : x) +; First condition upper-saturates, second doesn't lower-saturate. +define i32 @no_unsigned_sat_missing_lower(i32 %x) #0 { +; CHECK-LABEL: no_unsigned_sat_missing_lower +; CHECK-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp sgt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; x < k ? k : (x < 0 ? 0 : x) +; Second condition lower-saturates, first doesn't upper-saturate. +define i32 @no_unsigned_sat_missing_upper(i32 %x) #0 { +; CHECK-LABEL: no_unsigned_sat_missing_upper: +; CHECK-NOT: usat +entry: + %cmpUp = icmp slt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; Lower constant is different in the select and in the compare +define i32 @no_unsigned_sat_incorrect_constant(i32 %x) #0 { +; CHECK-LABEL: no_unsigned_sat_incorrect_constant: +; CHECK-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 -1, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; The interval is not [0, k] +define i32 @no_unsigned_sat_incorrect_interval(i32 %x) #0 { +; CHECK-LABEL: no_unsigned_sat_incorrect_interval: +; CHECK-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, -4 + %saturateLow = select i1 %cmpLow, i32 -4, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; The returned value (y) is not the same as the tested value (x). +define i32 @no_unsigned_sat_incorrect_return(i32 %x, i32 %y) #0 { +; CHECK-LABEL: no_unsigned_sat_incorrect_return: +; CHECK-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %y + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; One of the values in a compare (y) is not the same as the rest +; of the compare and select values (x). +define i32 @no_unsigned_sat_incorrect_compare(i32 %x, i32 %y) #0 { +; CHECK-LABEL: no_unsigned_sat_incorrect_compare: +; CHECK-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %y, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +}