diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp --- a/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -1496,9 +1496,11 @@ // Create loop block. MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *CheckBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(I, LoopBB); + F->insert(I, CheckBB); F->insert(I, RemBB); // Update machine-CFG edges by transferring all successors of the current @@ -1507,14 +1509,14 @@ BB->end()); RemBB->transferSuccessorsAndUpdatePHIs(BB); - // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB. - BB->addSuccessor(LoopBB); - BB->addSuccessor(RemBB); - LoopBB->addSuccessor(RemBB); - LoopBB->addSuccessor(LoopBB); + // Add edges BB => LoopBB => CheckBB => RemBB, CheckBB => LoopBB. + BB->addSuccessor(CheckBB); + LoopBB->addSuccessor(CheckBB); + CheckBB->addSuccessor(LoopBB); + CheckBB->addSuccessor(RemBB); - Register ShiftAmtReg = RI.createVirtualRegister(&AVR::LD8RegClass); - Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::LD8RegClass); + Register ShiftAmtReg = RI.createVirtualRegister(&AVR::GPR8RegClass); + Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::GPR8RegClass); Register ShiftReg = RI.createVirtualRegister(RC); Register ShiftReg2 = RI.createVirtualRegister(RC); Register ShiftAmtSrcReg = MI.getOperand(2).getReg(); @@ -1522,44 +1524,41 @@ Register DstReg = MI.getOperand(0).getReg(); // BB: - // cpi N, 0 - // breq RemBB - BuildMI(BB, dl, TII.get(AVR::CPIRdK)).addReg(ShiftAmtSrcReg).addImm(0); - BuildMI(BB, dl, TII.get(AVR::BREQk)).addMBB(RemBB); + // rjmp CheckBB + BuildMI(BB, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB); // LoopBB: - // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB] - // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB] // ShiftReg2 = shift ShiftReg + auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg); + if (HasRepeatedOperand) + ShiftMI.addReg(ShiftReg); + + // CheckBB: + // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB] + // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB] + // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] // ShiftAmt2 = ShiftAmt - 1; - BuildMI(LoopBB, dl, TII.get(AVR::PHI), ShiftReg) + // if (ShiftAmt2 >= 0) goto LoopBB; + BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftReg) .addReg(SrcReg) .addMBB(BB) .addReg(ShiftReg2) .addMBB(LoopBB); - BuildMI(LoopBB, dl, TII.get(AVR::PHI), ShiftAmtReg) + BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftAmtReg) .addReg(ShiftAmtSrcReg) .addMBB(BB) .addReg(ShiftAmtReg2) .addMBB(LoopBB); - - auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg); - if (HasRepeatedOperand) - ShiftMI.addReg(ShiftReg); - - BuildMI(LoopBB, dl, TII.get(AVR::SUBIRdK), ShiftAmtReg2) - .addReg(ShiftAmtReg) - .addImm(1); - BuildMI(LoopBB, dl, TII.get(AVR::BRNEk)).addMBB(LoopBB); - - // RemBB: - // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] - BuildMI(*RemBB, RemBB->begin(), dl, TII.get(AVR::PHI), DstReg) + BuildMI(CheckBB, dl, TII.get(AVR::PHI), DstReg) .addReg(SrcReg) .addMBB(BB) .addReg(ShiftReg2) .addMBB(LoopBB); + BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2) + .addReg(ShiftAmtReg); + BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB); + MI.eraseFromParent(); // The pseudo instruction is gone now. return RemBB; } diff --git a/llvm/test/CodeGen/AVR/rot.ll b/llvm/test/CodeGen/AVR/rot.ll --- a/llvm/test/CodeGen/AVR/rot.ll +++ b/llvm/test/CodeGen/AVR/rot.ll @@ -6,14 +6,14 @@ define i8 @rol8(i8 %val, i8 %amt) { ; CHECK: andi r22, 7 - ; CHECK-NEXT: cpi r22, 0 - ; CHECK-NEXT: breq .LBB0_2 + ; CHECK-NEXT: dec r22 + ; CHECK-NEXT: brmi .LBB0_2 ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: lsl r24 ; CHECK-NEXT: adc r24, r1 - ; CHECK-NEXT: subi r22, 1 - ; CHECK-NEXT: brne .LBB0_1 + ; CHECK-NEXT: dec r22 + ; CHECK-NEXT: brpl .LBB0_1 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: ret @@ -33,16 +33,16 @@ define i8 @ror8(i8 %val, i8 %amt) { ; CHECK: andi r22, 7 - ; CHECK-NEXT: cpi r22, 0 - ; CHECK-NEXT: breq .LBB1_2 + ; CHECK-NEXT: dec r22 + ; CHECK-NEXT: brmi .LBB1_2 ; CHECK-NEXT: .LBB1_1: ; CHECK-NEXT: lsr r24 ; CHECK-NEXT: ldi r0, 0 ; CHECK-NEXT: ror r0 ; CHECK-NEXT: or r24, r0 - ; CHECK-NEXT: subi r22, 1 - ; CHECK-NEXT: brne .LBB1_1 + ; CHECK-NEXT: dec r22 + ; CHECK-NEXT: brpl .LBB1_1 ; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -1,5 +1,49 @@ ; RUN: llc < %s -march=avr | FileCheck %s +; Optimize for speed. +; CHECK-LABEL: shift_i8_i8_speed +define i8 @shift_i8_i8_speed(i8 %a, i8 %b) { + ; CHECK: dec r22 + ; CHECK-NEXT: brmi .LBB0_2 + ; CHECK-NEXT: .LBB0_1: + ; CHECK-NEXT: lsl r24 + ; CHECK-NEXT: dec r22 + ; CHECK-NEXT: brpl .LBB0_1 + ; CHECK-NEXT: .LBB0_2: + ; CHECK-NEXT: ret + %result = shl i8 %a, %b + ret i8 %result +} + +; Optimize for size (producing slightly smaller code). +; CHECK-LABEL: shift_i8_i8_size +define i8 @shift_i8_i8_size(i8 %a, i8 %b) optsize { + ; CHECK: .LBB1_1: + ; CHECK-NEXT: dec r22 + ; CHECK-NEXT: brmi .LBB1_3 + ; CHECK: lsl r24 + ; CHECK-NEXT: rjmp .LBB1_1 + ; CHECK-NEXT: .LBB1_3: + ; CHECK-NEXT: ret + %result = shl i8 %a, %b + ret i8 %result +} + +; CHECK-LABEL: shift_i16_i16 +define i16 @shift_i16_i16(i16 %a, i16 %b) { + ; CHECK: dec r22 + ; CHECK-NEXT: brmi .LBB2_2 + ; CHECK-NEXT: .LBB2_1: + ; CHECK-NEXT: lsl r24 + ; CHECK-NEXT: rol r25 + ; CHECK-NEXT: dec r22 + ; CHECK-NEXT: brpl .LBB2_1 + ; CHECK-NEXT: .LBB2_2: + ; CHECK-NEXT: ret + %result = shl i16 %a, %b + ret i16 %result +} + ; CHECK-LABEL: shift_i64_i64 define i64 @shift_i64_i64(i64 %a, i64 %b) { ; CHECK: call __ashldi3