diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -19,6 +19,7 @@ #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -182,7 +183,8 @@ /// set - Initialize this FunctionLoweringInfo with the given Function /// and its associated MachineFunction. /// - void set(const Function &Fn, MachineFunction &MF, SelectionDAG *DAG); + void set(const Function &Fn, MachineFunction &MF, SelectionDAG *DAG, + const LoopInfo *LI); /// clear - Clear out all the function-specific state. This returns this /// FunctionLoweringInfo to an empty state, ready to be used for a diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -57,10 +57,28 @@ return false; } -static ISD::NodeType getPreferredExtendForValue(const Value *V) { - // For the users of the source value being used for compare instruction, if - // the number of signed predicate is greater than unsigned predicate, we - // prefer to use SIGN_EXTEND. +static bool isIncomingBBInLoop(const PHINode *PHI, const LoopInfo *LI) { + if (!LI) + return true; + + for (BasicBlock *IncomingBB : PHI->blocks()) { + if (LI->getLoopDepth(IncomingBB) > 0) + return true; + } + + return false; +} + +static ISD::NodeType getPreferredExtendForValue(const Value *V, + const LoopInfo *LI) { + // For the users of the source value being used for compare instruction or + // PHI node: + // + // 1. if PHI node is only used for return instruction which is required + // to signext or zeroext, we set default ExtendKind to SIGN_EXTEND or + // ZERO_EXTEND. + // 2. if the number of signed predicate is greater than unsigned + // predicate, we prefer to use SIGN_EXTEND. // // With this optimization, we would be able to reduce some redundant sign or // zero extension instruction, and eventually more machine CSE opportunities @@ -71,6 +89,19 @@ if (const auto *CI = dyn_cast(U)) { NumOfSigned += CI->isSigned(); NumOfUnsigned += CI->isUnsigned(); + } else if (const auto *PHI = dyn_cast(U)) { + if (PHI->hasOneUser() && !isIncomingBBInLoop(PHI, LI)) { + if (const auto RI = dyn_cast(U->user_back())) { + const Function *F = RI->getParent()->getParent(); + if (F->getAttributes().hasRetAttr(Attribute::SExt)) { + ExtendKind = ISD::SIGN_EXTEND; + NumOfSigned += 1; + } else if (F->getAttributes().hasRetAttr(Attribute::ZExt)) { + ExtendKind = ISD::ZERO_EXTEND; + NumOfUnsigned += 1; + } + } + } } } if (NumOfSigned > NumOfUnsigned) @@ -80,7 +111,7 @@ } void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, - SelectionDAG *DAG) { + SelectionDAG *DAG, const LoopInfo *LI) { Fn = &fn; MF = &mf; TLI = MF->getSubtarget().getTargetLowering(); @@ -235,7 +266,7 @@ InitializeRegForValue(&I); // Decide the preferred extend type for a value. - PreferredExtendType[&I] = getPreferredExtendForValue(&I); + PreferredExtendType[&I] = getPreferredExtendForValue(&I, LI); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -458,7 +458,7 @@ CurDAG->init(*MF, *ORE, this, LibInfo, getAnalysisIfAvailable(), PSI, BFI); - FuncInfo->set(Fn, *MF, CurDAG); + FuncInfo->set(Fn, *MF, CurDAG, LI); SwiftError->setFunction(*MF); // Now get the optional analyzes if we want to. diff --git a/llvm/test/CodeGen/ARM/load-global2.ll b/llvm/test/CodeGen/ARM/load-global2.ll --- a/llvm/test/CodeGen/ARM/load-global2.ll +++ b/llvm/test/CodeGen/ARM/load-global2.ll @@ -15,17 +15,13 @@ ; LINUX-PIC-NEXT: ldr r4, [pc, r4] ; LINUX-PIC-NEXT: ldrb r1, [r4] ; LINUX-PIC-NEXT: cmp r1, #0 -; LINUX-PIC-NEXT: beq .LBB0_2 -; LINUX-PIC-NEXT: @ %bb.1: @ %common.ret -; LINUX-PIC-NEXT: sxtb r0, r0 -; LINUX-PIC-NEXT: pop {r4, pc} -; LINUX-PIC-NEXT: .LBB0_2: @ %bb1 +; LINUX-PIC-NEXT: popne {r4, pc} +; LINUX-PIC-NEXT: .LBB0_1: @ %bb1 ; LINUX-PIC-NEXT: bl bar -; LINUX-PIC-NEXT: ldrb r0, [r4] -; LINUX-PIC-NEXT: sxtb r0, r0 +; LINUX-PIC-NEXT: ldrsb r0, [r4] ; LINUX-PIC-NEXT: pop {r4, pc} ; LINUX-PIC-NEXT: .p2align 2 -; LINUX-PIC-NEXT: @ %bb.3: +; LINUX-PIC-NEXT: @ %bb.2: ; LINUX-PIC-NEXT: .LCPI0_0: ; LINUX-PIC-NEXT: .Ltmp0: ; LINUX-PIC-NEXT: .long x(GOT_PREL)-((.LPC0_0+8)-.Ltmp0) diff --git a/llvm/test/CodeGen/RISCV/prefer-extend.ll b/llvm/test/CodeGen/RISCV/prefer-extend.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/prefer-extend.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefixes=RV32 %s +; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefixes=RV64 %s + +define signext i16 @foo(i16 signext %a, i1 zeroext %b) { +; RV32-LABEL: foo: +; RV32: # %bb.0: # %entry +; RV32-NEXT: beqz a1, .LBB0_2 +; RV32-NEXT: # %bb.1: # %add +; RV32-NEXT: addi a0, a0, 1 +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a0, a0, 16 +; RV32-NEXT: .LBB0_2: # %ret +; RV32-NEXT: ret +; +; RV64-LABEL: foo: +; RV64: # %bb.0: # %entry +; RV64-NEXT: beqz a1, .LBB0_2 +; RV64-NEXT: # %bb.1: # %add +; RV64-NEXT: addiw a0, a0, 1 +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a0, a0, 48 +; RV64-NEXT: .LBB0_2: # %ret +; RV64-NEXT: ret +entry: + br i1 %b, label %add, label %ret + +add: + %0 = add i16 %a, 1 + br label %ret + +ret: + %1 = phi i16 [ %a, %entry ], [ %0, %add ] + ret i16 %1 +} + +define zeroext i16 @foo1(i16 zeroext %a, i1 zeroext %b) { +; RV32-LABEL: foo1: +; RV32: # %bb.0: # %entry +; RV32-NEXT: beqz a1, .LBB1_2 +; RV32-NEXT: # %bb.1: # %add +; RV32-NEXT: addi a0, a0, 1 +; RV32-NEXT: lui a1, 16 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: .LBB1_2: # %ret +; RV32-NEXT: ret +; +; RV64-LABEL: foo1: +; RV64: # %bb.0: # %entry +; RV64-NEXT: beqz a1, .LBB1_2 +; RV64-NEXT: # %bb.1: # %add +; RV64-NEXT: addi a0, a0, 1 +; RV64-NEXT: lui a1, 16 +; RV64-NEXT: addiw a1, a1, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: .LBB1_2: # %ret +; RV64-NEXT: ret +entry: + br i1 %b, label %add, label %ret + +add: + %0 = add i16 %a, 1 + br label %ret + +ret: + %1 = phi i16 [ %a, %entry ], [ %0, %add ] + ret i16 %1 +} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -61,9 +61,8 @@ ; CHECK-LABEL: one_loop_add_add_v8i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ittt eq +; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 -; CHECK-NEXT: sxtheq r0, r0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} @@ -88,9 +87,8 @@ ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u16 r0, q0 -; CHECK-NEXT: pop.w {r7, lr} ; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %cmp12 = icmp eq i32 %N, 0 br i1 %cmp12, label %for.cond.cleanup, label %vector.ph @@ -132,9 +130,8 @@ ; CHECK-LABEL: one_loop_sub_add_v16i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ittt eq +; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 -; CHECK-NEXT: uxtbeq r0, r0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: .LBB2_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} @@ -159,9 +156,8 @@ ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u8 r0, q0 -; CHECK-NEXT: pop.w {r7, lr} ; CHECK-NEXT: uxtb r0, r0 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %cmp11 = icmp eq i32 %N, 0 br i1 %cmp11, label %for.cond.cleanup, label %vector.ph @@ -201,9 +197,8 @@ ; CHECK-LABEL: one_loop_sub_add_v8i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ittt eq +; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 -; CHECK-NEXT: sxtheq r0, r0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: .LBB3_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} @@ -228,9 +223,8 @@ ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u16 r0, q0 -; CHECK-NEXT: pop.w {r7, lr} ; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %cmp12 = icmp eq i32 %N, 0 br i1 %cmp12, label %for.cond.cleanup, label %vector.ph @@ -272,9 +266,8 @@ ; CHECK-LABEL: one_loop_mul_add_v16i8: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ittt eq +; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 -; CHECK-NEXT: uxtbeq r0, r0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: .LBB4_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} @@ -299,9 +292,8 @@ ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u8 r0, q0 -; CHECK-NEXT: pop.w {r7, lr} ; CHECK-NEXT: uxtb r0, r0 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %cmp10 = icmp eq i32 %N, 0 br i1 %cmp10, label %for.cond.cleanup, label %vector.ph @@ -341,9 +333,8 @@ ; CHECK-LABEL: one_loop_mul_add_v8i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ittt eq +; CHECK-NEXT: itt eq ; CHECK-NEXT: moveq r0, #0 -; CHECK-NEXT: sxtheq r0, r0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: .LBB5_1: @ %vector.ph ; CHECK-NEXT: push {r7, lr} @@ -368,9 +359,8 @@ ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u16 r0, q0 -; CHECK-NEXT: pop.w {r7, lr} ; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r7, pc} entry: %cmp12 = icmp eq i32 %N, 0 br i1 %cmp12, label %for.cond.cleanup, label %vector.ph