Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3233,6 +3233,10 @@ if (SDValue Res = BuildSDIVPow2(N)) return Res; + if (DAG.getMachineFunction().getFunction().optForMinSize() && + !VT.isVector()) + return SDValue(); + // Create constants that are functions of the shift amount value. EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy); Index: test/CodeGen/ARM/sdiv-opt-size.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/sdiv-opt-size.ll @@ -0,0 +1,85 @@ +; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7em %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s + +; Test sdiv i16 + +define dso_local signext i16 @f0(i16 signext %F) local_unnamed_addr #0 { +; CHECK-LABEL: f0 +; CHECK: movs r1, #2 +; CHECK-NEXT: sdiv r0, r0, r1 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr + +entry: + %0 = sdiv i16 %F, 2 + ret i16 %0 +} + +; Same as above, but now with i32 + +define dso_local i32 @f1(i32 %F) local_unnamed_addr #0 { +; CHECK-LABEL: f1 +; CHECK: movs r1, #4 +; CHECK-NEXT: sdiv r0, r0, r1 +; CHECK-NEXT: bx lr + +entry: + %div = sdiv i32 %F, 4 + ret i32 %div +} + +; The immediate is not a power of 2, so we expect a sdiv. + +define dso_local i32 @f2(i32 %F) local_unnamed_addr #0 { +; CHECK-LABEL: f2 +; CHECK: movs r1, #5 +; CHECK-NEXT: sdiv r0, r0, r1 +; CHECK-NEXT: bx lr + +entry: + %div = sdiv i32 %F, 5 + ret i32 %div +} + +; Try a larger power of 2 immediate + +define dso_local i32 @f3(i32 %F) local_unnamed_addr #0 { +; CHECK-LABEL: f3 +; CHECK: mov.w r1, #8192 +; CHECK-NEXT: sdiv r0, r0, r1 +; CHECK-NEXT: bx lr +entry: + %div = sdiv i32 %F, 8192 + ret i32 %div +} + +attributes #0 = { minsize norecurse nounwind optsize readnone } + + +; These functions dont have the minsize attribute, so should +; not lower the sdiv to sdiv, but to the faster instruction +; sequence. + +define dso_local signext i16 @f4(i16 signext %F) { +; CHECK-LABEL: f4 +; CHECK: uxth r1, r0 +; CHECK-NEXT: add.w r0, r0, r1, lsr #15 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: asrs r0, r0, #1 +; CHECK-NEXT: bx lr +entry: + %0 = sdiv i16 %F, 2 + ret i16 %0 +} + +define dso_local i32 @f5(i32 %F) { +; CHECK-LABEL: f5 +; CHECK: asrs r1, r0, #31 +; CHECK-NEXT: add.w r0, r0, r1, lsr #30 +; CHECK-NEXT: asrs r0, r0, #2 +; CHECK-NEXT: bx lr +entry: + %div = sdiv i32 %F, 4 + ret i32 %div +}