Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8276,10 +8276,23 @@ // (csel x, (add x, 1), !cc ...) // // The latter will get matched to a CSINC instruction. -static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { +static SDValue performAddFolding(SDNode *Op, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!"); SDValue LHS = Op->getOperand(0); SDValue RHS = Op->getOperand(1); + SDLoc dl(Op); + EVT VT = Op->getValueType(0); + + // (add shl(x, c1), shl(y, c2)) -> (add shl(y, c2), shl(x, c1)) if c1 < c2 + if (Subtarget->isKryo()) + if (LHS.getOpcode() == ISD::SHL && RHS.getOpcode() == ISD::SHL) { + ConstantSDNode *C0 = dyn_cast(LHS.getOperand(1)); + ConstantSDNode *C1 = dyn_cast(RHS.getOperand(1)); + if (C0 && C1 && C0->getAPIntValue().ult(C1->getAPIntValue())) + return DAG.getNode(Op->getOpcode(), dl, VT, RHS, LHS); + } + SetCCInfoAndKind InfoAndKind; // If neither operand is a SET_CC, give up. @@ -8298,7 +8311,6 @@ SDValue CCVal; SDValue Cmp; - SDLoc dl(Op); if (InfoAndKind.IsAArch64) { CCVal = DAG.getConstant( AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl, @@ -8310,7 +8322,6 @@ ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true), CCVal, DAG, dl); - EVT VT = Op->getValueType(0); LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT)); return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp); } @@ -8328,14 +8339,15 @@ // more appropriate form for those patterns to trigger. static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); MVT VT = N->getSimpleValueType(0); if (!VT.is128BitVector()) { if (N->getOpcode() == ISD::ADD) - return performSetccAddFolding(N, DAG); + return performAddFolding(N, DAG, Subtarget); return SDValue(); } @@ -9812,7 +9824,7 @@ break; case ISD::ADD: case ISD::SUB: - return performAddSubLongCombine(N, DCI, DAG); + return performAddSubLongCombine(N, DCI, DAG, Subtarget); case ISD::XOR: return performXorCombine(N, DAG, DCI, Subtarget); case ISD::MUL: Index: test/CodeGen/AArch64/kryo-lsl.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/kryo-lsl.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s + +; Verify that the shift amount in the add instruction is alwarys the smaller +; one. + +define i32 @lsl_add1(i32 %a, i32 %b) { +; CHECK-LABEL: lsl_add1: +; CHECK: lsl w8, w0, #3 +; CHECK-NEXT: add w0, w8, w1, lsl #2 +; CHECK-NEXT: ret +entry: + %shl = shl i32 %a, 3 + %shl1 = shl i32 %b, 2 + %add = add i32 %shl1, %shl + ret i32 %add +} + +define i32 @lsl_add2(i32 %a, i32 %b) { +; CHECK-LABEL: lsl_add2: +; CHECK: lsl w8, w1, #3 +; CHECK-NEXT: add w0, w8, w0, lsl #2 +; CHECK-NEXT: ret + +entry: + %shl = shl i32 %a, 2 + %shl1 = shl i32 %b, 3 + %add = add i32 %shl1, %shl + ret i32 %add +} + +define i64 @lsl_add3(i64 %a, i64 %b) { +; CHECK-LABEL: lsl_add3: +; CHECK: lsl x8, x0, #3 +; CHECK-NEXT: add x0, x8, x1, lsl #2 +; CHECK-NEXT: ret + +entry: + %shl = shl i64 %a, 3 + %shl1 = shl i64 %b, 2 + %add = add i64 %shl1, %shl + ret i64 %add +} + +define i64 @lsl_add4(i64 %a, i64 %b) { +; CHECK-LABEL: lsl_add4: +; CHECK: lsl x8, x1, #3 +; CHECK-NEXT: add x0, x8, x0, lsl #2 +; CHECK-NEXT: ret +entry: + %shl = shl i64 %a, 2 + %shl1 = shl i64 %b, 3 + %add = add i64 %shl1, %shl + ret i64 %add +} +