Index: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -271,6 +271,22 @@ // Get the alignment operand for a NEON VLD or VST instruction. SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs, bool is64BitVector); + + /// Returns the number of instructions required to materialize the given + /// constant in a register, or 3 if a literal pool load is needed. + unsigned ConstantMaterializationCost(unsigned Val) const; + + /// Checks if N is a multiplication by a constant where we can extract out a + /// power of two from the constant so that it can be used in a shift, but only + /// if it simplifies the materialization of the constant. Returns true if it + /// is, and assigns to PowerOfTwo the power of two that should be extracted + /// out and to NewMulConst the new constant to be multiplied by. + bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, + unsigned &PowerOfTwo, SDValue &NewMulConst) const; + + /// Replace N with M in CurDAG, in a way that also ensures that M gets + /// selected when N would have been selected. + void replaceDAGValue(const SDValue &N, SDValue M); }; } @@ -464,6 +480,61 @@ (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); } +unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const { + if (Subtarget->isThumb()) { + if (Val <= 255) return 1; // MOV + if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW + if (~Val <= 255) return 2; // MOV + MVN + if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL + } else { + if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV + if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN + if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW + if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs + } + if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT + return 3; // Literal pool load +} + +bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, + unsigned MaxShift, + unsigned &PowerOfTwo, + SDValue &NewMulConst) const { + assert(N.getOpcode() == ISD::MUL); + assert(MaxShift > 0); + + // If the multiply is used in more than one place then changing the constant + // will make other uses incorrect, so don't. + if (!N.hasOneUse()) return false; + // Check if the multiply is by a constant + ConstantSDNode *MulConst = dyn_cast(N.getOperand(1)); + if (!MulConst) return false; + // If the constant is used in more than one place then modifying it will mean + // we need to materialize two constants instead of one, which is a bad idea. + if (!MulConst->hasOneUse()) return false; + unsigned MulConstVal = MulConst->getZExtValue(); + if (MulConstVal == 0) return false; + + // Find the largest power of 2 that MulConstVal is a multiple of + PowerOfTwo = MaxShift; + while ((MulConstVal % (1 << PowerOfTwo)) != 0) { + --PowerOfTwo; + if (PowerOfTwo == 0) return false; + } + + // Only optimise if the new cost is better + unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); + NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); + unsigned OldCost = ConstantMaterializationCost(MulConstVal); + unsigned NewCost = ConstantMaterializationCost(NewMulConstVal); + return NewCost < OldCost; +} + +void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { + CurDAG->RepositionNode(N.getNode(), M.getNode()); + CurDAG->ReplaceAllUsesWith(N, M); +} + bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, SDValue &BaseReg, SDValue &Opc, @@ -471,6 +542,21 @@ if (DisableShifterOp) return false; + // If N is a multiply-by-constant and it's profitable to extract a shift and + // use it in a shifted operand do so. + if (N.getOpcode() == ISD::MUL) { + unsigned PowerOfTwo = 0; + SDValue NewMulConst; + if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { + replaceDAGValue(N.getOperand(1), NewMulConst); + BaseReg = N; + Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, + PowerOfTwo), + SDLoc(N), MVT::i32); + return true; + } + } + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); // Don't match base register only case. That is matched to a separate @@ -655,6 +741,18 @@ } } + // If Offset is a multiply-by-constant and it's profitable to extract a shift + // and use it in a shifted operand do so. + if (Offset.getOpcode() == ISD::MUL) { + unsigned PowerOfTwo = 0; + SDValue NewMulConst; + if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { + replaceDAGValue(Offset.getOperand(1), NewMulConst); + ShAmt = PowerOfTwo; + ShOpcVal = ARM_AM::lsl; + } + } + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), SDLoc(N), MVT::i32); return true; @@ -1314,6 +1412,17 @@ } } + // If OffReg is a multiply-by-constant and it's profitable to extract a shift + // and use it in a shifted operand do so. + if (OffReg.getOpcode() == ISD::MUL) { + unsigned PowerOfTwo = 0; + SDValue NewMulConst; + if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { + replaceDAGValue(OffReg.getOperand(1), NewMulConst); + ShAmt = PowerOfTwo; + } + } + ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); return true; @@ -2392,25 +2501,8 @@ } case ISD::Constant: { unsigned Val = cast(N)->getZExtValue(); - bool UseCP = true; - if (Subtarget->useMovt(*MF)) - // Thumb2-aware targets have the MOVT instruction, so all immediates can - // be done with MOV + MOVT, at worst. - UseCP = false; - else { - if (Subtarget->isThumb()) { - UseCP = (Val > 255 && // MOV - ~Val > 255 && // MOV + MVN - !ARM_AM::isThumbImmShiftedVal(Val) && // MOV + LSL - !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW - } else - UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV - ARM_AM::getSOImmVal(~Val) == -1 && // MVN - !ARM_AM::isSOImmTwoPartVal(Val) && // two instrs. - !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW - } - - if (UseCP) { + // If we can't materialize the constant we need to use a literal pool + if (ConstantMaterializationCost(Val) > 2) { SDValue CPIdx = CurDAG->getTargetConstantPool( ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), TLI->getPointerTy(CurDAG->getDataLayout())); Index: llvm/trunk/test/CodeGen/ARM/shifter_operand.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/shifter_operand.ll +++ llvm/trunk/test/CodeGen/ARM/shifter_operand.ll @@ -1,14 +1,14 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 -; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9 +; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-ARM +; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-ARM +; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMB ; rdar://8576755 define i32 @test1(i32 %X, i32 %Y, i8 %sh) { -; A8-LABEL: test1: -; A8: add r0, r0, r1, lsl r2 - -; A9-LABEL: test1: -; A9: add r0, r0, r1, lsl r2 +; CHECK-LABEL: test1: +; CHECK-ARM: add r0, r0, r1, lsl r2 +; CHECK-THUMB: lsls r1, r2 +; CHECK-THUMB: add r0, r1 %shift.upgrd.1 = zext i8 %sh to i32 %A = shl i32 %Y, %shift.upgrd.1 %B = add i32 %X, %A @@ -16,11 +16,10 @@ } define i32 @test2(i32 %X, i32 %Y, i8 %sh) { -; A8-LABEL: test2: -; A8: bic r0, r0, r1, asr r2 - -; A9-LABEL: test2: -; A9: bic r0, r0, r1, asr r2 +; CHECK-LABEL: test2: +; CHECK-ARM: bic r0, r0, r1, asr r2 +; CHECK-THUMB: asrs r1, r2 +; CHECK-THUMB: bics r0, r1 %shift.upgrd.2 = zext i8 %sh to i32 %A = ashr i32 %Y, %shift.upgrd.2 %B = xor i32 %A, -1 @@ -30,14 +29,9 @@ define i32 @test3(i32 %base, i32 %base2, i32 %offset) { entry: -; A8-LABEL: test3: -; A8: ldr r0, [r0, r2, lsl #2] -; A8: ldr r1, [r1, r2, lsl #2] - -; lsl #2 is free -; A9-LABEL: test3: -; A9: ldr r0, [r0, r2, lsl #2] -; A9: ldr r1, [r1, r2, lsl #2] +; CHECK-LABEL: test3: +; CHECK: ldr{{(.w)?}} r0, [r0, r2, lsl #2] +; CHECK: ldr{{(.w)?}} r1, [r1, r2, lsl #2] %tmp1 = shl i32 %offset, 2 %tmp2 = add i32 %base, %tmp1 %tmp3 = inttoptr i32 %tmp2 to i32* @@ -53,17 +47,11 @@ define fastcc void @test4(i16 %addr) nounwind { entry: -; A8-LABEL: test4: -; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2] -; A8-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]! -; A8: str [[REG]], [r0, r1, lsl #2] -; A8-NOT: str [[REG]], [r0] - -; A9-LABEL: test4: -; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2] -; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]! -; A9: str [[REG]], [r0, r1, lsl #2] -; A9-NOT: str [[REG]], [r0] +; CHECK-LABEL: test4: +; CHECK: ldr{{(.w)?}} [[REG:r[0-9]+]], [r0, r1, lsl #2] +; CHECK-NOT: ldr{{(.w)?}} [[REG:r[0-9]+]], [r0, r1, lsl #2]! +; CHECK: str{{(.w)?}} [[REG]], [r0, r1, lsl #2] +; CHECK-NOT: str{{(.w)?}} [[REG]], [r0] %0 = tail call i8* (...) @malloc(i32 undef) nounwind %1 = bitcast i8* %0 to i32* %2 = sext i16 %addr to i32 @@ -73,3 +61,166 @@ store i32 %5, i32* %3, align 4 ret void } + +define i32 @test_orr_extract_from_mul_1(i32 %x, i32 %y) { +entry: +; CHECK-LABEL: test_orr_extract_from_mul_1 +; CHECK: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-ARM: orr r0, r1, r0 +; CHECK-THUMB: muls r1, r2, r1 +; CHECk-THUMB: orrs r0, r1 + %mul = mul i32 %y, 63767 + %or = or i32 %mul, %x + ret i32 %or +} + +define i32 @test_orr_extract_from_mul_2(i32 %x, i32 %y) { +; CHECK-LABEL: test_orr_extract_from_mul_2 +; CHECK: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #1 +entry: + %mul1 = mul i32 %y, 127534 + %or = or i32 %mul1, %x + ret i32 %or +} + +define i32 @test_orr_extract_from_mul_3(i32 %x, i32 %y) { +; CHECK-LABEL: test_orr_extract_from_mul_3 +; CHECK: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #2 +entry: + %mul1 = mul i32 %y, 255068 + %or = or i32 %mul1, %x + ret i32 %or +} + +define i32 @test_orr_extract_from_mul_4(i32 %x, i32 %y) { +; CHECK-LABEL: test_orr_extract_from_mul_4 +; CHECK: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #3 +entry: + %mul1 = mul i32 %y, 510136 + %or = or i32 %mul1, %x + ret i32 %or +} + +define i32 @test_orr_extract_from_mul_5(i32 %x, i32 %y) { +; CHECK-LABEL: test_orr_extract_from_mul_5 +; CHECK: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #4 +entry: + %mul1 = mul i32 %y, 1020272 + %or = or i32 %mul1, %x + ret i32 %or +} + +define i32 @test_orr_extract_from_mul_6(i32 %x, i32 %y) { +; CHECK-LABEL: test_orr_extract_from_mul_6 +; CHECK: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #16 +entry: + %mul = mul i32 %y, -115933184 + %or = or i32 %mul, %x + ret i32 %or +} + +define i32 @test_load_extract_from_mul_1(i8* %x, i32 %y) { +; CHECK-LABEL: test_load_extract_from_mul_1 +; CHECK: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK: ldrb r0, [r0, r1] +entry: + %mul = mul i32 %y, 63767 + %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +define i32 @test_load_extract_from_mul_2(i8* %x, i32 %y) { +; CHECK-LABEL: test_load_extract_from_mul_2 +; CHECK: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #1] +entry: + %mul1 = mul i32 %y, 127534 + %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +define i32 @test_load_extract_from_mul_3(i8* %x, i32 %y) { +; CHECK-LABEL: test_load_extract_from_mul_3 +; CHECK: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #2] +entry: + %mul1 = mul i32 %y, 255068 + %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +define i32 @test_load_extract_from_mul_4(i8* %x, i32 %y) { +; CHECK-LABEL: test_load_extract_from_mul_4 +; CHECK: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #3] +entry: + %mul1 = mul i32 %y, 510136 + %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +define i32 @test_load_extract_from_mul_5(i8* %x, i32 %y) { +; CHECK-LABEL: test_load_extract_from_mul_5 +; CHECK-ARM: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-ARM: ldrb r0, [r0, r1, lsl #4] +; CHECK-THUMB: movw r2, #37232 +; CHECK-THUMB: movt r2, #15 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK-THUMB: ldrb r0, [r0, r1] +entry: + %mul1 = mul i32 %y, 1020272 + %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +} + +define i32 @test_load_extract_from_mul_6(i8* %x, i32 %y) { +; CHECK-LABEL: test_load_extract_from_mul_6 +; CHECK-ARM: movw r2, #63767 +; CHECK-ARM: mul r1, r1, r2 +; CHECK-ARM: ldrb r0, [r0, r1, lsl #16] +; CHECK-THUMB: movs r2, #0 +; CHECK-THUMB: movt r2, #63767 +; CHECK-THUMB: muls r1, r2, r1 +; CHECK-THUMB: ldrb r0, [r0, r1] +entry: + %mul = mul i32 %y, -115933184 + %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + ret i32 %conv +}