diff --git a/llvm/lib/Target/AVR/AVRShiftExpand.cpp b/llvm/lib/Target/AVR/AVRShiftExpand.cpp --- a/llvm/lib/Target/AVR/AVRShiftExpand.cpp +++ b/llvm/lib/Target/AVR/AVRShiftExpand.cpp @@ -7,9 +7,10 @@ //===----------------------------------------------------------------------===// // /// \file -/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just -/// like avr-gcc. This must be done in IR because otherwise the type legalizer -/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3. +/// Expand non-8-bit and non-16-bit shift instructions (shl, lshr, ashr) to +/// inline loops, just like avr-gcc. This must be done in IR because otherwise +/// the type legalizer will turn 32-bit shifts into (non-existing) library calls +/// such as __ashlsi3. // //===----------------------------------------------------------------------===// @@ -51,8 +52,9 @@ if (!I.isShift()) // Only expand shift instructions (shl, lshr, ashr). continue; - if (I.getType() != Type::getInt32Ty(Ctx)) - // Only expand plain i32 types. + if (I.getType() == Type::getInt8Ty(Ctx) || I.getType() == Type::getInt16Ty(Ctx)) + // Only expand non-8-bit and non-16-bit shifts, since those are expanded + // directly during isel. continue; if (isa(I.getOperand(1))) // Only expand when the shift amount is not known. @@ -75,7 +77,7 @@ void AVRShiftExpand::expand(BinaryOperator *BI) { auto &Ctx = BI->getContext(); IRBuilder<> Builder(BI); - Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *InputTy = cast(BI)->getType(); Type *Int8Ty = Type::getInt8Ty(Ctx); Value *Int8Zero = ConstantInt::get(Int8Ty, 0); @@ -101,7 +103,7 @@ Builder.SetInsertPoint(LoopBB); PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2); ShiftAmountPHI->addIncoming(ShiftAmount, BB); - PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2); + PHINode *ValuePHI = Builder.CreatePHI(InputTy, 2); ValuePHI->addIncoming(BI->getOperand(0), BB); // Subtract the shift amount by one, as we're shifting one this loop @@ -116,13 +118,13 @@ Value *ValueShifted; switch (BI->getOpcode()) { case Instruction::Shl: - ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1)); + ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(InputTy, 1)); break; case Instruction::LShr: - ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1)); + ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(InputTy, 1)); break; case Instruction::AShr: - ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1)); + ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(InputTy, 1)); break; default: llvm_unreachable("asked to expand an instruction that is not a shift"); @@ -137,7 +139,7 @@ // Collect the resulting value. This is necessary in the IR but won't produce // any actual instructions. Builder.SetInsertPoint(BI); - PHINode *Result = Builder.CreatePHI(Int32Ty, 2); + PHINode *Result = Builder.CreatePHI(InputTy, 2); Result->addIncoming(BI->getOperand(0), BB); Result->addIncoming(ValueShifted, LoopBB); diff --git a/llvm/test/CodeGen/AVR/shift-expand.ll b/llvm/test/CodeGen/AVR/shift-expand.ll --- a/llvm/test/CodeGen/AVR/shift-expand.ll +++ b/llvm/test/CodeGen/AVR/shift-expand.ll @@ -8,8 +8,17 @@ target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8" target triple = "avr" -define i32 @shl(i32 %value, i32 %amount) addrspace(1) { -; CHECK-LABEL: @shl( +define i16 @shl16(i16 %value, i16 %amount) addrspace(1) { +; CHECK-LABEL: @shl16( +; CHECK-NEXT: [[RESULT:%.*]] = shl i16 [[VALUE:%.*]], [[AMOUNT:%.*]] +; CHECK-NEXT: ret i16 [[RESULT]] +; + %result = shl i16 %value, %amount + ret i16 %result +} + +define i32 @shl32(i32 %value, i32 %amount) addrspace(1) { +; CHECK-LABEL: @shl32( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] @@ -28,8 +37,39 @@ ret i32 %result } -define i32 @lshr(i32 %value, i32 %amount) addrspace(1) { -; CHECK-LABEL: @lshr( +define i40 @shl40(i40 %value, i40 %amount) addrspace(1) { +; CHECK-LABEL: @shl40( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] +; CHECK: shift.loop: +; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1 +; CHECK-NEXT: [[TMP6]] = shl i40 [[TMP4]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] +; CHECK: shift.done: +; CHECK-NEXT: [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: ret i40 [[TMP8]] +; + %result = shl i40 %value, %amount + ret i40 %result +} + +; ------------------------------------------------------------------------------ + +define i16 @lshr16(i16 %value, i16 %amount) addrspace(1) { +; CHECK-LABEL: @lshr16( +; CHECK-NEXT: [[RESULT:%.*]] = lshr i16 [[VALUE:%.*]], [[AMOUNT:%.*]] +; CHECK-NEXT: ret i16 [[RESULT]] +; + %result = lshr i16 %value, %amount + ret i16 %result +} + +define i32 @lshr32(i32 %value, i32 %amount) addrspace(1) { +; CHECK-LABEL: @lshr32( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] @@ -48,42 +88,73 @@ ret i32 %result } -define i32 @ashr(i32 %0, i32 %1) addrspace(1) { -; CHECK-LABEL: @ashr( -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP1:%.*]] to i8 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP3]], 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] +define i40 @lshr40(i40 %value, i40 %amount) addrspace(1) { +; CHECK-LABEL: @lshr40( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] ; CHECK: shift.loop: -; CHECK-NEXT: [[TMP5:%.*]] = phi i8 [ [[TMP3]], [[TMP2:%.*]] ], [ [[TMP7:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP0:%.*]], [[TMP2]] ], [ [[TMP8:%.*]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: [[TMP7]] = sub i8 [[TMP5]], 1 -; CHECK-NEXT: [[TMP8]] = ashr i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] +; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1 +; CHECK-NEXT: [[TMP6]] = lshr i40 [[TMP4]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] ; CHECK: shift.done: -; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP0]], [[TMP2]] ], [ [[TMP8]], [[SHIFT_LOOP]] ] -; CHECK-NEXT: ret i32 [[TMP10]] +; CHECK-NEXT: [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: ret i40 [[TMP8]] ; - %3 = ashr i32 %0, %1 - ret i32 %3 + %result = lshr i40 %value, %amount + ret i40 %result } -; This function is not modified because it is not an i32. -define i40 @shl40(i40 %value, i40 %amount) addrspace(1) { -; CHECK-LABEL: @shl40( -; CHECK-NEXT: [[RESULT:%.*]] = shl i40 [[VALUE:%.*]], [[AMOUNT:%.*]] -; CHECK-NEXT: ret i40 [[RESULT]] +; ------------------------------------------------------------------------------ + +define i16 @ashr16(i16 %value, i16 %amount) addrspace(1) { +; CHECK-LABEL: @ashr16( +; CHECK-NEXT: [[RESULT:%.*]] = ashr i16 [[VALUE:%.*]], [[AMOUNT:%.*]] +; CHECK-NEXT: ret i16 [[RESULT]] ; - %result = shl i40 %value, %amount - ret i40 %result + %result = ashr i16 %value, %amount + ret i16 %result } -; This function isn't either, although perhaps it should. -define i24 @shl24(i24 %value, i24 %amount) addrspace(1) { -; CHECK-LABEL: @shl24( -; CHECK-NEXT: [[RESULT:%.*]] = shl i24 [[VALUE:%.*]], [[AMOUNT:%.*]] -; CHECK-NEXT: ret i24 [[RESULT]] +define i32 @ashr32(i32 %value, i32 %amount) addrspace(1) { +; CHECK-LABEL: @ashr32( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[AMOUNT:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] +; CHECK: shift.loop: +; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1 +; CHECK-NEXT: [[TMP6]] = ashr i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] +; CHECK: shift.done: +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: ret i32 [[TMP8]] +; + %result = ashr i32 %value, %amount + ret i32 %result +} + +define i40 @ashr40(i40 %value, i40 %amount) addrspace(1) { +; CHECK-LABEL: @ashr40( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i40 [[AMOUNT:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[SHIFT_DONE:%.*]], label [[SHIFT_LOOP:%.*]] +; CHECK: shift.loop: +; CHECK-NEXT: [[TMP3:%.*]] = phi i8 [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i40 [ [[VALUE:%.*]], [[TMP0]] ], [ [[TMP6:%.*]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: [[TMP5]] = sub i8 [[TMP3]], 1 +; CHECK-NEXT: [[TMP6]] = ashr i40 [[TMP4]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[SHIFT_DONE]], label [[SHIFT_LOOP]] +; CHECK: shift.done: +; CHECK-NEXT: [[TMP8:%.*]] = phi i40 [ [[VALUE]], [[TMP0]] ], [ [[TMP6]], [[SHIFT_LOOP]] ] +; CHECK-NEXT: ret i40 [[TMP8]] ; - %result = shl i24 %value, %amount - ret i24 %result + %result = ashr i40 %value, %amount + ret i40 %result } diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll --- a/llvm/test/CodeGen/AVR/shift.ll +++ b/llvm/test/CodeGen/AVR/shift.ll @@ -54,10 +54,36 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: push r16 ; CHECK-NEXT: push r17 -; CHECK-NEXT: mov r16, r10 -; CHECK-NEXT: mov r17, r11 -; CHECK-NEXT: andi r17, 0 -; CHECK-NEXT: rcall __ashldi3 +; CHECK-NEXT: mov r30, r10 +; CHECK-NEXT: mov r31, r11 +; CHECK-NEXT: cpi r30, 0 +; CHECK-NEXT: breq .LBB3_3 +; CHECK-NEXT: ; %bb.1: ; %shift.loop.preheader +; CHECK-NEXT: mov r27, r1 +; CHECK-NEXT: mov r16, r1 +; CHECK-NEXT: mov r17, r1 +; CHECK-NEXT: .LBB3_2: ; %shift.loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mov r31, r21 +; CHECK-NEXT: lsl r31 +; CHECK-NEXT: mov r26, r1 +; CHECK-NEXT: rol r26 +; CHECK-NEXT: lsl r22 +; CHECK-NEXT: rol r23 +; CHECK-NEXT: rol r24 +; CHECK-NEXT: rol r25 +; CHECK-NEXT: or r24, r16 +; CHECK-NEXT: or r25, r17 +; CHECK-NEXT: or r22, r26 +; CHECK-NEXT: or r23, r27 +; CHECK-NEXT: lsl r18 +; CHECK-NEXT: rol r19 +; CHECK-NEXT: rol r20 +; CHECK-NEXT: rol r21 +; CHECK-NEXT: dec r30 +; CHECK-NEXT: cpi r30, 0 +; CHECK-NEXT: brne .LBB3_2 +; CHECK-NEXT: .LBB3_3: ; %shift.done ; CHECK-NEXT: pop r17 ; CHECK-NEXT: pop r16 ; CHECK-NEXT: ret