diff --git a/llvm/lib/Target/AVR/AVRShiftExpand.cpp b/llvm/lib/Target/AVR/AVRShiftExpand.cpp --- a/llvm/lib/Target/AVR/AVRShiftExpand.cpp +++ b/llvm/lib/Target/AVR/AVRShiftExpand.cpp @@ -51,8 +51,9 @@ if (!I.isShift()) // Only expand shift instructions (shl, lshr, ashr). continue; - if (I.getType() != Type::getInt32Ty(Ctx)) - // Only expand plain i32 types. + if (I.getType() == Type::getInt8Ty(Ctx)) + // Only expand non-8-bit shifts, since 8-bit-shifts are expanded directly + // during isel. continue; if (isa(I.getOperand(1))) // Only expand when the shift amount is not known. @@ -75,7 +76,7 @@ void AVRShiftExpand::expand(BinaryOperator *BI) { auto &Ctx = BI->getContext(); IRBuilder<> Builder(BI); - Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *InputTy = cast(BI)->getType(); Type *Int8Ty = Type::getInt8Ty(Ctx); Value *Int8Zero = ConstantInt::get(Int8Ty, 0); @@ -101,7 +102,7 @@ Builder.SetInsertPoint(LoopBB); PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2); ShiftAmountPHI->addIncoming(ShiftAmount, BB); - PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2); + PHINode *ValuePHI = Builder.CreatePHI(InputTy, 2); ValuePHI->addIncoming(BI->getOperand(0), BB); // Subtract the shift amount by one, as we're shifting one this loop @@ -116,13 +117,13 @@ Value *ValueShifted; switch (BI->getOpcode()) { case Instruction::Shl: - ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1)); + ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(InputTy, 1)); break; case Instruction::LShr: - ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1)); + ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(InputTy, 1)); break; case Instruction::AShr: - ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1)); + ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(InputTy, 1)); break; default: llvm_unreachable("asked to expand an instruction that is not a shift"); @@ -137,7 +138,7 @@ // Collect the resulting value. This is necessary in the IR but won't produce // any actual instructions. Builder.SetInsertPoint(BI); - PHINode *Result = Builder.CreatePHI(Int32Ty, 2); + PHINode *Result = Builder.CreatePHI(InputTy, 2); Result->addIncoming(BI->getOperand(0), BB); Result->addIncoming(ValueShifted, LoopBB); diff --git a/llvm/test/CodeGen/AVR/shift-expand.ll b/llvm/test/CodeGen/AVR/shift-expand.ll --- a/llvm/test/CodeGen/AVR/shift-expand.ll +++ b/llvm/test/CodeGen/AVR/shift-expand.ll @@ -68,21 +68,41 @@ ret i32 %3 } -; This function is not modified because it is not an i32. define i40 @shl40(i40 %value, i40 %amount) addrspace(1) { ; CHECK-LABEL: @shl40( -; CHECK-NEXT: [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]] -; CHECK-NEXT: ret i40 [[RESULT]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] +; CHECK: shift.loop: +; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1 +; CHECK-NEXT: [[TMP6]] = shl i40 [[TMP4]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] +; CHECK: shift.done: +; CHECK-NEXT: [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: ret i40 [[TMP8]] ; %result = shl i40 %value, %amount ret i40 %result } -; This function isn't either, although perhaps it should. define i24 @shl24(i24 %value, i24 %amount) addrspace(1) { ; CHECK-LABEL: @shl24( -; CHECK-NEXT: [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]] -; CHECK-NEXT: ret i24 [[RESULT]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i24 [[AMOUNT:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] +; CHECK: shift.loop: +; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i24 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1 +; CHECK-NEXT: [[TMP6]] = shl i24 [[TMP4]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] +; CHECK: shift.done: +; CHECK-NEXT: [[TMP8:%.*]] = phi i24 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: ret i24 [[TMP8]] ; %result = shl i24 %value, %amount ret i24 %result diff --git a/llvm/test/CodeGen/AVR/shift-late-expand.ll b/llvm/test/CodeGen/AVR/shift-late-expand.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AVR/shift-late-expand.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=avr | FileCheck %s + +define i64 @test(i64 %x, i32 %y) { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: ; %start +; CHECK-NEXT: push r16 +; CHECK-NEXT: push r17 +; CHECK-NEXT: mov r30, r14 +; CHECK-NEXT: mov r31, r15 +; CHECK-NEXT: ori r30, 38 +; CHECK-NEXT: cpi r30, 0 +; CHECK-NEXT: breq .LBB0_3 +; CHECK-NEXT: ; %bb.1: ; %shift.loop.preheader +; CHECK-NEXT: mov r26, r1 +; CHECK-NEXT: mov r17, r1 +; CHECK-NEXT: mov r16, r1 +; CHECK-NEXT: .LBB0_2: ; %shift.loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lsr r21 +; CHECK-NEXT: ror r20 +; CHECK-NEXT: ror r19 +; CHECK-NEXT: ror r18 +; CHECK-NEXT: mov r31, r22 +; CHECK-NEXT: lsr r31 +; CHECK-NEXT: mov r27, r1 +; CHECK-NEXT: ror r27 +; CHECK-NEXT: or r20, r26 +; CHECK-NEXT: or r21, r27 +; CHECK-NEXT: or r18, r16 +; CHECK-NEXT: or r19, r17 +; CHECK-NEXT: lsr r25 +; CHECK-NEXT: ror r24 +; CHECK-NEXT: ror r23 +; CHECK-NEXT: ror r22 +; CHECK-NEXT: dec r30 +; CHECK-NEXT: cpi r30, 0 +; CHECK-NEXT: brne .LBB0_2 +; CHECK-NEXT: .LBB0_3: ; %shift.done +; CHECK-NEXT: pop r17 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: ret +start: + %0 = or i32 %y, 38 + %1 = zext i32 %0 to i64 + %2 = lshr i64 %x, %1 + ret i64 %2 +} diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -36,14 +36,16 @@ define i16 @shift_i16_i16(i16 %a, i16 %b) { ; CHECK-LABEL: shift_i16_i16: ; CHECK: ; %bb.0: -; CHECK-NEXT: dec r22 -; CHECK-NEXT: brmi .LBB2_2 -; CHECK-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cpi r22, 0 +; CHECK-NEXT: breq .LBB2_2 +; CHECK-NEXT: .LBB2_1: ; %shift.loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lsl r24 ; CHECK-NEXT: rol r25 ; CHECK-NEXT: dec r22 -; CHECK-NEXT: brpl .LBB2_1 -; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: cpi r22, 0 +; CHECK-NEXT: brne .LBB2_1 +; CHECK-NEXT: .LBB2_2: ; %shift.done ; CHECK-NEXT: ret %result = shl i16 %a, %b ret i16 %result @@ -54,10 +56,36 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: push r16 ; CHECK-NEXT: push r17 -; CHECK-NEXT: mov r16, r10 -; CHECK-NEXT: mov r17, r11 -; CHECK-NEXT: andi r17, 0 -; CHECK-NEXT: rcall __ashldi3 +; CHECK-NEXT: mov r30, r10 +; CHECK-NEXT: mov r31, r11 +; CHECK-NEXT: cpi r30, 0 +; CHECK-NEXT: breq .LBB3_3 +; CHECK-NEXT: ; %bb.1: ; %shift.loop.preheader +; CHECK-NEXT: mov r27, r1 +; CHECK-NEXT: mov r16, r1 +; CHECK-NEXT: mov r17, r1 +; CHECK-NEXT: .LBB3_2: ; %shift.loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov r31, r21 +; CHECK-NEXT: lsl r31 +; CHECK-NEXT: mov r26, r1 +; CHECK-NEXT: rol r26 +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: rol r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: or r24, r16 +; CHECK-NEXT: or r25, r17 +; CHECK-NEXT: or r22, r26 +; CHECK-NEXT: or r23, r27 +; CHECK-NEXT: lsl r18 +; CHECK-NEXT: rol r19 +; CHECK-NEXT: rol r20 +; CHECK-NEXT: rol r21 +; CHECK-NEXT: dec r30 +; CHECK-NEXT: cpi r30, 0 +; CHECK-NEXT: brne .LBB3_2 +; CHECK-NEXT: .LBB3_3: ; %shift.done ; CHECK-NEXT: pop r17 ; CHECK-NEXT: pop r16 ; CHECK-NEXT: ret