Index: llvm/lib/Target/AVR/AVRISelLowering.h =================================================================== --- llvm/lib/Target/AVR/AVRISelLowering.h +++ llvm/lib/Target/AVR/AVRISelLowering.h @@ -56,6 +56,8 @@ CMPC, /// Test for zero or minus instruction. TST, + /// Swap Rd[7:4] <-> Rd[3:0]. + SWAP, /// Operand 0 and operand 1 are selection variable, operand 2 /// is condition code and operand 3 is flag operand. SELECT_CC Index: llvm/lib/Target/AVR/AVRISelLowering.cpp =================================================================== --- llvm/lib/Target/AVR/AVRISelLowering.cpp +++ llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -334,6 +334,24 @@ llvm_unreachable("Invalid shift opcode"); } + // Optimize int8 shifts. + if (VT.getSizeInBits() == 8) { + if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) { + // Optimize LSL when 4 <= ShiftAmount <= 6. + Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); + Victim = + DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0xf0, dl, VT)); + ShiftAmount -= 4; + } else if (Op.getOpcode() == ISD::SRL && 4 <= ShiftAmount && + ShiftAmount < 7) { + // Optimize LSR when 4 <= ShiftAmount <= 6. + Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); + Victim = + DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT)); + ShiftAmount -= 4; + } + } + while (ShiftAmount--) { Victim = DAG.getNode(Opc8, dl, VT, Victim); } Index: llvm/lib/Target/AVR/AVRInstrInfo.td =================================================================== --- llvm/lib/Target/AVR/AVRInstrInfo.td +++ llvm/lib/Target/AVR/AVRInstrInfo.td @@ -67,6 +67,9 @@ def AVRrorLoop : SDNode<"AVRISD::RORLOOP", SDTIntShiftOp>; def AVRasrLoop : SDNode<"AVRISD::ASRLOOP", SDTIntShiftOp>; +// SWAP node. +def AVRSwap : SDNode<"AVRISD::SWAP", SDTIntUnaryOp>; + //===----------------------------------------------------------------------===// // AVR Operands, Complex Patterns and Transformations Definitions. //===----------------------------------------------------------------------===// @@ -1719,7 +1722,7 @@ (outs GPR8:$rd), (ins GPR8:$src), "swap\t$rd", - [(set i8:$rd, (bswap i8:$src))]>; + [(set i8:$rd, (AVRSwap i8:$src))]>; // IO register bit set/clear operations. //:TODO: add patterns when popcount(imm)==2 to be expanded with 2 sbi/cbi Index: llvm/test/CodeGen/AVR/ctlz.ll =================================================================== --- llvm/test/CodeGen/AVR/ctlz.ll +++ llvm/test/CodeGen/AVR/ctlz.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: count_leading_zeros: ; CHECK: cpi [[RESULT:r[0-9]+]], 0 -; CHECK: brne .LBB0_1 -; CHECK: rjmp .LBB0_2 +; CHECK: breq .LBB0_2 ; CHECK: mov [[SCRATCH:r[0-9]+]], {{.*}}[[RESULT]] ; CHECK: lsr {{.*}}[[SCRATCH]] ; CHECK: or {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] @@ -20,10 +19,8 @@ ; CHECK: lsr {{.*}}[[RESULT]] ; CHECK: or {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] ; CHECK: mov {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] -; CHECK: lsr {{.*}}[[SCRATCH]] -; CHECK: lsr {{.*}}[[SCRATCH]] -; CHECK: lsr {{.*}}[[SCRATCH]] -; CHECK: lsr {{.*}}[[SCRATCH]] +; CHECK: swap {{.*}}[[SCRATCH]] +; CHECK: andi {{.*}}[[SCRATCH]], 15 ; CHECK: or {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] ; CHECK: com {{.*}}[[SCRATCH]] ; CHECK: mov {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] @@ -37,10 +34,7 @@ ; CHECK: andi {{.*}}[[SCRATCH]], 51 ; CHECK: add {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] ; CHECK: mov {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] -; CHECK: lsr {{.*}}[[RESULT]] -; CHECK: lsr {{.*}}[[RESULT]] -; CHECK: lsr {{.*}}[[RESULT]] -; CHECK: lsr {{.*}}[[RESULT]] +; CHECK: swap {{.*}}[[RESULT]] ; CHECK: add {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] ; CHECK: andi {{.*}}[[RESULT]], 15 ; CHECK: ret Index: llvm/test/CodeGen/AVR/ctpop.ll =================================================================== --- llvm/test/CodeGen/AVR/ctpop.ll +++ llvm/test/CodeGen/AVR/ctpop.ll @@ -20,10 +20,7 @@ ; CHECK: andi {{.*}}[[RESULT]], 51 ; CHECK: add {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] ; CHECK: mov {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] -; CHECK: lsr {{.*}}[[SCRATCH]] -; CHECK: lsr {{.*}}[[SCRATCH]] -; CHECK: lsr {{.*}}[[SCRATCH]] -; CHECK: lsr {{.*}}[[SCRATCH]] +; CHECK: swap {{.*}}[[SCRATCH]] ; CHECK: add {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] ; CHECK: andi {{.*}}[[SCRATCH]], 15 ; CHECK: mov {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] Index: llvm/test/CodeGen/AVR/cttz.ll =================================================================== --- llvm/test/CodeGen/AVR/cttz.ll +++ llvm/test/CodeGen/AVR/cttz.ll @@ -26,10 +26,7 @@ ; CHECK: andi {{.*}}[[RESULT]], 51 ; CHECK: add {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] ; CHECK: mov {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] -; CHECK: lsr {{.*}}[[SCRATCH]] -; CHECK: lsr {{.*}}[[SCRATCH]] -; CHECK: lsr {{.*}}[[SCRATCH]] -; CHECK: lsr {{.*}}[[SCRATCH]] +; CHECK: swap {{.*}}[[SCRATCH]] ; CHECK: add {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] ; CHECK: andi {{.*}}[[SCRATCH]], 15 ; CHECK: mov {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] Index: llvm/test/CodeGen/AVR/shift.ll =================================================================== --- llvm/test/CodeGen/AVR/shift.ll +++ llvm/test/CodeGen/AVR/shift.ll @@ -6,3 +6,105 @@ %result = shl i64 %a, %b ret i64 %result } + +define i8 @lsl_i8_1(i8 %a) { +; CHECK-LABEL: lsl_i8_1: +; CHECK: lsl r24 + %res = shl i8 %a, 1 + ret i8 %res +} + +define i8 @lsl_i8_2(i8 %a) { +; CHECK-LABEL: lsl_i8_2: +; CHECK: lsl r24 +; CHECK-NEXT: lsl r24 + %res = shl i8 %a, 2 + ret i8 %res +} + +define i8 @lsl_i8_3(i8 %a) { +; CHECK-LABEL: lsl_i8_3: +; CHECK: lsl r24 +; CHECK-NEXT: lsl r24 +; CHECK-NEXT: lsl r24 + %res = shl i8 %a, 3 + ret i8 %res +} + +define i8 @lsl_i8_4(i8 %a) { +; CHECK-LABEL: lsl_i8_4: +; CHECK: swap r24 +; CHECK-NEXT: andi r24, -16 + %res = shl i8 %a, 4 + ret i8 %res +} + +define i8 @lsl_i8_5(i8 %a) { +; CHECK-LABEL: lsl_i8_5: +; CHECK: swap r24 +; CHECK-NEXT: andi r24, -16 +; CHECK-NEXT: lsl r24 + %res = shl i8 %a, 5 + ret i8 %res +} + +define i8 @lsl_i8_6(i8 %a) { +; CHECK-LABEL: lsl_i8_6: +; CHECK: swap r24 +; CHECK-NEXT: andi r24, -16 +; CHECK-NEXT: lsl r24 +; CHECK-NEXT: lsl r24 + %res = shl i8 %a, 6 + ret i8 %res +} + +define i8 @lsr_i8_1(i8 %a) { +; CHECK-LABEL: lsr_i8_1: +; CHECK: lsr r24 + %res = lshr i8 %a, 1 + ret i8 %res +} + +define i8 @lsr_i8_2(i8 %a) { +; CHECK-LABEL: lsr_i8_2: +; CHECK: lsr r24 +; CHECK-NEXT: lsr r24 + %res = lshr i8 %a, 2 + ret i8 %res +} + +define i8 @lsr_i8_3(i8 %a) { +; CHECK-LABEL: lsr_i8_3: +; CHECK: lsr r24 +; CHECK-NEXT: lsr r24 +; CHECK-NEXT: lsr r24 + %res = lshr i8 %a, 3 + ret i8 %res +} + +define i8 @lsr_i8_4(i8 %a) { +; CHECK-LABEL: lsr_i8_4: +; CHECK: swap r24 +; CHECK-NEXT: andi r24, 15 + %res = lshr i8 %a, 4 + ret i8 %res +} + +define i8 @lsr_i8_5(i8 %a) { +; CHECK-LABEL: lsr_i8_5: +; CHECK: swap r24 +; CHECK-NEXT: andi r24, 15 +; CHECK-NEXT: lsr r24 + %res = lshr i8 %a, 5 + ret i8 %res +} + +define i8 @lsr_i8_6(i8 %a) { +; CHECK-LABEL: lsr_i8_6: +; CHECK: swap r24 +; CHECK-NEXT: andi r24, 15 +; CHECK-NEXT: lsr r24 +; CHECK-NEXT: lsr r24 + %res = lshr i8 %a, 6 + ret i8 %res +}