diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -1923,6 +1923,56 @@ // The bigger shifts are already handled above. assert((ShiftAmt < 8) && "Unexpect shift amount"); + // Shift by four bits, using a complicated swap/eor/andi/eor sequence. + // It only works for logical shifts because the bits shifted in are all + // zeroes. + // To shift a single byte right, it produces code like this: + // swap r0 + // andi r0, 0x0f + // For a two-byte (16-bit) shift, it adds the following instructions to shift + // the upper byte into the lower byte: + // swap r1 + // eor r0, r1 + // andi r1, 0x0f + // eor r0, r1 + // For bigger shifts, it repeats the above sequence. For example, for a 3-byte + // (24-bit) shift it adds: + // swap r2 + // eor r1, r2 + // andi r2, 0x0f + // eor r1, r2 + if (!ArithmeticShift && ShiftAmt >= 4) { + Register Prev = 0; + for (size_t I = 0; I < Regs.size(); I++) { + size_t Idx = ShiftLeft ? I : Regs.size() - I - 1; + Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass); + BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg) + .addReg(Regs[Idx].first, 0, Regs[Idx].second); + if (I != 0) { + Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass); + BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R) + .addReg(Prev) + .addReg(SwapReg); + Prev = R; + } + Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass); + BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg) + .addReg(SwapReg) + .addImm(ShiftLeft ? 0xf0 : 0x0f); + if (I != 0) { + Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass); + BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R) + .addReg(Prev) + .addReg(AndReg); + size_t PrevIdx = ShiftLeft ? Idx - 1 : Idx + 1; + Regs[PrevIdx] = std::pair(R, 0); + } + Prev = AndReg; + Regs[Idx] = std::pair(AndReg, 0); + } + ShiftAmt -= 4; + } + // Shift by one. This is the fallback that always works, and the shift // operation that is used for 1, 2, and 3 bit shifts. while (ShiftLeft && ShiftAmt) { diff --git a/llvm/test/CodeGen/AVR/shift32.ll b/llvm/test/CodeGen/AVR/shift32.ll --- a/llvm/test/CodeGen/AVR/shift32.ll +++ b/llvm/test/CodeGen/AVR/shift32.ll @@ -29,6 +29,55 @@ ret i32 %res } +define i32 @shl_i32_4(i32 %a) { +; CHECK-LABEL: shl_i32_4: +; CHECK: ; %bb.0: +; CHECK-NEXT: swap r25 +; CHECK-NEXT: andi r25, 240 +; CHECK-NEXT: swap r24 +; CHECK-NEXT: eor r25, r24 +; CHECK-NEXT: andi r24, 240 +; CHECK-NEXT: eor r25, r24 +; CHECK-NEXT: swap r23 +; CHECK-NEXT: eor r24, r23 +; CHECK-NEXT: andi r23, 240 +; CHECK-NEXT: eor r24, r23 +; CHECK-NEXT: swap r22 +; CHECK-NEXT: eor r23, r22 +; CHECK-NEXT: andi r22, 240 +; CHECK-NEXT: eor r23, r22 +; CHECK-NEXT: ret + %res = shl i32 %a, 4 + ret i32 %res +} + +; shift four bits and then shift one bit +define i32 @shl_i32_5(i32 %a) { +; CHECK-LABEL: shl_i32_5: +; CHECK: ; %bb.0: +; CHECK-NEXT: swap r25 +; CHECK-NEXT: andi r25, 240 +; CHECK-NEXT: swap r24 +; CHECK-NEXT: eor r25, r24 +; CHECK-NEXT: andi r24, 240 +; CHECK-NEXT: eor r25, r24 +; CHECK-NEXT: swap r23 +; CHECK-NEXT: eor r24, r23 +; CHECK-NEXT: andi r23, 240 +; CHECK-NEXT: eor r24, r23 +; CHECK-NEXT: swap r22 +; CHECK-NEXT: eor r23, r22 +; CHECK-NEXT: andi r22, 240 +; CHECK-NEXT: eor r23, r22 +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: rol r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: ret + %res = shl i32 %a, 5 + ret i32 %res +} + define i32 @shl_i32_8(i32 %a) { ; CHECK-LABEL: shl_i32_8: ; CHECK: ; %bb.0: @@ -56,6 +105,29 @@ ret i32 %res } +; shift 3 of 4 registers and move the others around +define i32 @shl_i32_12(i32 %a) { +; CHECK-LABEL: shl_i32_12: +; CHECK: ; %bb.0: +; CHECK-NEXT: swap r24 +; CHECK-NEXT: andi r24, 240 +; CHECK-NEXT: swap r23 +; CHECK-NEXT: eor r24, r23 +; CHECK-NEXT: andi r23, 240 +; CHECK-NEXT: eor r24, r23 +; CHECK-NEXT: swap r22 +; CHECK-NEXT: eor r23, r22 +; CHECK-NEXT: andi r22, 240 +; CHECK-NEXT: eor r23, r22 +; CHECK-NEXT: mov r25, r24 +; CHECK-NEXT: mov r24, r23 +; CHECK-NEXT: mov r23, r22 +; CHECK-NEXT: mov r22, r1 +; CHECK-NEXT: ret + %res = shl i32 %a, 12 + ret i32 %res +} + ; This is a special case: this shift is performed directly inside SelectionDAG ; instead of as a custom lowering like the other shift operations. define i32 @shl_i32_16(i32 %a) { @@ -88,6 +160,21 @@ ret void } +; shift only the most significant byte and then move it +define i32 @shl_i32_28(i32 %a) { +; CHECK-LABEL: shl_i32_28: +; CHECK: ; %bb.0: +; CHECK-NEXT: swap r22 +; CHECK-NEXT: andi r22, 240 +; CHECK-NEXT: mov r25, r22 +; CHECK-NEXT: mov r24, r1 +; CHECK-NEXT: mov r23, r1 +; CHECK-NEXT: mov r22, r1 +; CHECK-NEXT: ret + %res = shl i32 %a, 28 + ret i32 %res +} + define i32 @lshr_i32_1(i32 %a) { ; CHECK-LABEL: lshr_i32_1: ; CHECK: ; %bb.0: @@ -116,6 +203,28 @@ ret i32 %res } +define i32 @lshr_i32_4(i32 %a) { +; CHECK-LABEL: lshr_i32_4: +; CHECK: ; %bb.0: +; CHECK-NEXT: swap r22 +; CHECK-NEXT: andi r22, 15 +; CHECK-NEXT: swap r23 +; CHECK-NEXT: eor r22, r23 +; CHECK-NEXT: andi r23, 15 +; CHECK-NEXT: eor r22, r23 +; CHECK-NEXT: swap r24 +; CHECK-NEXT: eor r23, r24 +; CHECK-NEXT: andi r24, 15 +; CHECK-NEXT: eor r23, r24 +; CHECK-NEXT: swap r25 +; CHECK-NEXT: eor r24, r25 +; CHECK-NEXT: andi r25, 15 +; CHECK-NEXT: eor r24, r25 +; CHECK-NEXT: ret + %res = lshr i32 %a, 4 + ret i32 %res +} + define i32 @lshr_i32_8(i32 %a) { ; CHECK-LABEL: lshr_i32_8: ; CHECK: ; %bb.0: @@ -199,6 +308,31 @@ ret i32 %res } +; can't use the swap/andi/eor trick here +define i32 @ashr_i32_4(i32 %a) { +; CHECK-LABEL: ashr_i32_4: +; CHECK: ; %bb.0: +; CHECK-NEXT: asr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: asr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: asr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: asr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: ret + %res = ashr i32 %a, 4 + ret i32 %res +} + ; TODO: this could be optimized to 4 movs, instead of 6. define i32 @ashr_i32_8(i32 %a) { ; CHECK-LABEL: ashr_i32_8: