diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10697,6 +10697,7 @@ // passed with CCValAssign::Indirect. static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL, + const ISD::InputArg &In, const RISCVTargetLowering &TLI) { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); @@ -10707,6 +10708,20 @@ RegInfo.addLiveIn(VA.getLocReg(), VReg); Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); + // If input is sign extended from 32 bits, note it for the SExtWRemoval pass. + if (In.isOrigArg()) { + Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex()); + if (OrigArg->getType()->isIntegerTy()) { + unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth(); + // An input zero extended from i31 can also be considered sign extended. + if ((BitWidth <= 32 && In.Flags.isSExt()) || + (BitWidth < 32 && In.Flags.isZExt())) { + RISCVMachineFunctionInfo *RVFI = MF.getInfo(); + RVFI->addSExt32Register(VReg); + } + } + } + if (VA.getLocInfo() == CCValAssign::Indirect) return Val; @@ -11019,7 +11034,7 @@ if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); else if (VA.isRegLoc()) - ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this); + ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this); else ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -66,6 +66,9 @@ /// Size of stack frame to save callee saved registers unsigned CalleeSavedStackSize = 0; + /// Registers that have been sign extended from i32. + SmallVector SExt32Registers; + public: RISCVMachineFunctionInfo(const MachineFunction &MF) {} @@ -118,6 +121,9 @@ void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } void initializeBaseYamlFields(const yaml::RISCVMachineFunctionInfo &YamlMFI); + + void addSExt32Register(Register Reg); + bool isSExt32Register(Register Reg) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp @@ -35,3 +35,11 @@ VarArgsFrameIndex = YamlMFI.VarArgsFrameIndex; VarArgsSaveSize = YamlMFI.VarArgsSaveSize; } + +void RISCVMachineFunctionInfo::addSExt32Register(Register Reg) { + SExt32Registers.push_back(Reg); +} + +bool RISCVMachineFunctionInfo::isSExt32Register(Register Reg) const { + return is_contained(SExt32Registers, Reg); +} diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -11,6 +11,7 @@ //===---------------------------------------------------------------------===// #include "RISCV.h" +#include "RISCVMachineFunctionInfo.h" #include "RISCVSubtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -315,9 +316,21 @@ // Unknown opcode, give up. return false; case RISCV::COPY: { - Register SrcReg = MI->getOperand(1).getReg(); + const MachineFunction *MF = MI->getMF(); + const RISCVMachineFunctionInfo *RVFI = + MF->getInfo(); + + // If this is the entry block and the register is livein, see if we know + // it is sign extended. + if (MI->getParent() == &MF->front()) { + Register VReg = MI->getOperand(0).getReg(); + if (MF->getRegInfo().isLiveIn(VReg)) + return RVFI->isSExt32Register(VReg); + } - // TODO: Handle arguments and returns from calls? + // TODO: Handle returns from calls? + + Register SrcReg = MI->getOperand(1).getReg(); // If this is a copy from another register, check its source instruction. if (!SrcReg.isVirtual()) diff --git a/llvm/test/CodeGen/RISCV/select-cc.ll b/llvm/test/CodeGen/RISCV/select-cc.ll --- a/llvm/test/CodeGen/RISCV/select-cc.ll +++ b/llvm/test/CodeGen/RISCV/select-cc.ll @@ -114,26 +114,22 @@ ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: .LBB0_12: ; RV64I-NEXT: lw a2, 0(a1) -; RV64I-NEXT: sext.w a3, a0 -; RV64I-NEXT: blt a2, a3, .LBB0_14 +; RV64I-NEXT: blt a2, a0, .LBB0_14 ; RV64I-NEXT: # %bb.13: ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: .LBB0_14: ; RV64I-NEXT: lw a2, 0(a1) -; RV64I-NEXT: sext.w a3, a0 -; RV64I-NEXT: bge a3, a2, .LBB0_16 +; RV64I-NEXT: bge a0, a2, .LBB0_16 ; RV64I-NEXT: # %bb.15: ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: .LBB0_16: ; RV64I-NEXT: lw a2, 0(a1) -; RV64I-NEXT: sext.w a3, a0 -; RV64I-NEXT: blt a3, a2, .LBB0_18 +; RV64I-NEXT: blt a0, a2, .LBB0_18 ; RV64I-NEXT: # %bb.17: ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: .LBB0_18: ; RV64I-NEXT: lw a2, 0(a1) -; RV64I-NEXT: sext.w a3, a0 -; RV64I-NEXT: bge a2, a3, .LBB0_20 +; RV64I-NEXT: bge a2, a0, .LBB0_20 ; RV64I-NEXT: # %bb.19: ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: .LBB0_20: @@ -159,7 +155,6 @@ ; RV64I-NEXT: # %bb.27: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: .LBB0_28: -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret %val1 = load volatile i32, i32* %b %tst1 = icmp eq i32 %a, %val1 diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -706,3 +706,268 @@ %i8 = trunc i64 %i5 to i32 ret i32 %i8 } + + +; int test14(int a, int n) { +; for (int i = 1; i < n; ++i) { +; if (a > 1000) +; return -1; +; a += i; +; } +; +; return a; +; } +; +; There should be no sext.w in the loop. +define signext i32 @test14(i32 signext %0, i32 signext %1) { +; CHECK-LABEL: test14: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: blt a1, a2, .LBB13_4 +; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: li a3, 1000 +; CHECK-NEXT: .LBB13_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: blt a3, a0, .LBB13_5 +; CHECK-NEXT: # %bb.3: # in Loop: Header=BB13_2 Depth=1 +; CHECK-NEXT: addw a0, a2, a0 +; CHECK-NEXT: addiw a2, a2, 1 +; CHECK-NEXT: blt a2, a1, .LBB13_2 +; CHECK-NEXT: .LBB13_4: +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB13_5: +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: ret +; +; NOREMOVAL-LABEL: test14: +; NOREMOVAL: # %bb.0: +; NOREMOVAL-NEXT: li a2, 2 +; NOREMOVAL-NEXT: blt a1, a2, .LBB13_4 +; NOREMOVAL-NEXT: # %bb.1: # %.preheader +; NOREMOVAL-NEXT: li a2, 1 +; NOREMOVAL-NEXT: li a3, 1000 +; NOREMOVAL-NEXT: .LBB13_2: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: sext.w a4, a0 +; NOREMOVAL-NEXT: blt a3, a4, .LBB13_5 +; NOREMOVAL-NEXT: # %bb.3: # in Loop: Header=BB13_2 Depth=1 +; NOREMOVAL-NEXT: addw a0, a2, a0 +; NOREMOVAL-NEXT: addiw a2, a2, 1 +; NOREMOVAL-NEXT: blt a2, a1, .LBB13_2 +; NOREMOVAL-NEXT: .LBB13_4: +; NOREMOVAL-NEXT: ret +; NOREMOVAL-NEXT: .LBB13_5: +; NOREMOVAL-NEXT: li a0, -1 +; NOREMOVAL-NEXT: ret + %3 = icmp sgt i32 %1, 1 + br i1 %3, label %4, label %12 + +4: ; preds = %2, %8 + %5 = phi i32 [ %10, %8 ], [ 1, %2 ] + %6 = phi i32 [ %9, %8 ], [ %0, %2 ] + %7 = icmp sgt i32 %6, 1000 + br i1 %7, label %12, label %8 + +8: ; preds = %4 + %9 = add nsw i32 %5, %6 + %10 = add nuw nsw i32 %5, 1 + %11 = icmp slt i32 %10, %1 + br i1 %11, label %4, label %12 + +12: ; preds = %8, %4, %2 + %13 = phi i32 [ %0, %2 ], [ -1, %4 ], [ %9, %8 ] + ret i32 %13 +} + +; Same as test14 but the signext attribute is missing from the argument so we +; can't optimize out the sext.w. +define signext i32 @test14b(i32 %0, i32 signext %1) { +; CHECK-LABEL: test14b: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: blt a1, a2, .LBB14_4 +; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: li a3, 1000 +; CHECK-NEXT: .LBB14_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sext.w a4, a0 +; CHECK-NEXT: blt a3, a4, .LBB14_5 +; CHECK-NEXT: # %bb.3: # in Loop: Header=BB14_2 Depth=1 +; CHECK-NEXT: addw a0, a2, a0 +; CHECK-NEXT: addiw a2, a2, 1 +; CHECK-NEXT: blt a2, a1, .LBB14_2 +; CHECK-NEXT: .LBB14_4: +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB14_5: +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: ret +; +; NOREMOVAL-LABEL: test14b: +; NOREMOVAL: # %bb.0: +; NOREMOVAL-NEXT: li a2, 2 +; NOREMOVAL-NEXT: blt a1, a2, .LBB14_4 +; NOREMOVAL-NEXT: # %bb.1: # %.preheader +; NOREMOVAL-NEXT: li a2, 1 +; NOREMOVAL-NEXT: li a3, 1000 +; NOREMOVAL-NEXT: .LBB14_2: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: sext.w a4, a0 +; NOREMOVAL-NEXT: blt a3, a4, .LBB14_5 +; NOREMOVAL-NEXT: # %bb.3: # in Loop: Header=BB14_2 Depth=1 +; NOREMOVAL-NEXT: addw a0, a2, a0 +; NOREMOVAL-NEXT: addiw a2, a2, 1 +; NOREMOVAL-NEXT: blt a2, a1, .LBB14_2 +; NOREMOVAL-NEXT: .LBB14_4: +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret +; NOREMOVAL-NEXT: .LBB14_5: +; NOREMOVAL-NEXT: li a0, -1 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret + %3 = icmp sgt i32 %1, 1 + br i1 %3, label %4, label %12 + +4: ; preds = %2, %8 + %5 = phi i32 [ %10, %8 ], [ 1, %2 ] + %6 = phi i32 [ %9, %8 ], [ %0, %2 ] + %7 = icmp sgt i32 %6, 1000 + br i1 %7, label %12, label %8 + +8: ; preds = %4 + %9 = add nsw i32 %5, %6 + %10 = add nuw nsw i32 %5, 1 + %11 = icmp slt i32 %10, %1 + br i1 %11, label %4, label %12 + +12: ; preds = %8, %4, %2 + %13 = phi i32 [ %0, %2 ], [ -1, %4 ], [ %9, %8 ] + ret i32 %13 +} + +; Same as test14, but the argument is zero extended instead of sign extended so +; we can't optimize it. +define signext i32 @test14c(i32 zeroext %0, i32 signext %1) { +; CHECK-LABEL: test14c: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: blt a1, a2, .LBB15_4 +; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: li a3, 1000 +; CHECK-NEXT: .LBB15_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sext.w a4, a0 +; CHECK-NEXT: blt a3, a4, .LBB15_5 +; CHECK-NEXT: # %bb.3: # in Loop: Header=BB15_2 Depth=1 +; CHECK-NEXT: addw a0, a2, a0 +; CHECK-NEXT: addiw a2, a2, 1 +; CHECK-NEXT: blt a2, a1, .LBB15_2 +; CHECK-NEXT: .LBB15_4: +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB15_5: +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: ret +; +; NOREMOVAL-LABEL: test14c: +; NOREMOVAL: # %bb.0: +; NOREMOVAL-NEXT: li a2, 2 +; NOREMOVAL-NEXT: blt a1, a2, .LBB15_4 +; NOREMOVAL-NEXT: # %bb.1: # %.preheader +; NOREMOVAL-NEXT: li a2, 1 +; NOREMOVAL-NEXT: li a3, 1000 +; NOREMOVAL-NEXT: .LBB15_2: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: sext.w a4, a0 +; NOREMOVAL-NEXT: blt a3, a4, .LBB15_5 +; NOREMOVAL-NEXT: # %bb.3: # in Loop: Header=BB15_2 Depth=1 +; NOREMOVAL-NEXT: addw a0, a2, a0 +; NOREMOVAL-NEXT: addiw a2, a2, 1 +; NOREMOVAL-NEXT: blt a2, a1, .LBB15_2 +; NOREMOVAL-NEXT: .LBB15_4: +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret +; NOREMOVAL-NEXT: .LBB15_5: +; NOREMOVAL-NEXT: li a0, -1 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret + %3 = icmp sgt i32 %1, 1 + br i1 %3, label %4, label %12 + +4: ; preds = %2, %8 + %5 = phi i32 [ %10, %8 ], [ 1, %2 ] + %6 = phi i32 [ %9, %8 ], [ %0, %2 ] + %7 = icmp sgt i32 %6, 1000 + br i1 %7, label %12, label %8 + +8: ; preds = %4 + %9 = add nsw i32 %5, %6 + %10 = add nuw nsw i32 %5, 1 + %11 = icmp slt i32 %10, %1 + br i1 %11, label %4, label %12 + +12: ; preds = %8, %4, %2 + %13 = phi i32 [ %0, %2 ], [ -1, %4 ], [ %9, %8 ] + ret i32 %13 +} + +; Same as test14 but the argument is zero extended from i31. Since bits 63:31 +; are zero, this counts as an i32 sign extend so we can optimize it. +define signext i32 @test14d(i31 zeroext %0, i32 signext %1) { +; CHECK-LABEL: test14d: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 2 +; CHECK-NEXT: blt a1, a2, .LBB16_4 +; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: li a3, 1000 +; CHECK-NEXT: .LBB16_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: blt a3, a0, .LBB16_5 +; CHECK-NEXT: # %bb.3: # in Loop: Header=BB16_2 Depth=1 +; CHECK-NEXT: addw a0, a2, a0 +; CHECK-NEXT: addiw a2, a2, 1 +; CHECK-NEXT: blt a2, a1, .LBB16_2 +; CHECK-NEXT: .LBB16_4: +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB16_5: +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: ret +; +; NOREMOVAL-LABEL: test14d: +; NOREMOVAL: # %bb.0: +; NOREMOVAL-NEXT: li a2, 2 +; NOREMOVAL-NEXT: blt a1, a2, .LBB16_4 +; NOREMOVAL-NEXT: # %bb.1: # %.preheader +; NOREMOVAL-NEXT: li a2, 1 +; NOREMOVAL-NEXT: li a3, 1000 +; NOREMOVAL-NEXT: .LBB16_2: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: sext.w a4, a0 +; NOREMOVAL-NEXT: blt a3, a4, .LBB16_5 +; NOREMOVAL-NEXT: # %bb.3: # in Loop: Header=BB16_2 Depth=1 +; NOREMOVAL-NEXT: addw a0, a2, a0 +; NOREMOVAL-NEXT: addiw a2, a2, 1 +; NOREMOVAL-NEXT: blt a2, a1, .LBB16_2 +; NOREMOVAL-NEXT: .LBB16_4: +; NOREMOVAL-NEXT: ret +; NOREMOVAL-NEXT: .LBB16_5: +; NOREMOVAL-NEXT: li a0, -1 +; NOREMOVAL-NEXT: ret + %zext = zext i31 %0 to i32 + %3 = icmp sgt i32 %1, 1 + br i1 %3, label %4, label %12 + +4: ; preds = %2, %8 + %5 = phi i32 [ %10, %8 ], [ 1, %2 ] + %6 = phi i32 [ %9, %8 ], [ %zext, %2 ] + %7 = icmp sgt i32 %6, 1000 + br i1 %7, label %12, label %8 + +8: ; preds = %4 + %9 = add nsw i32 %5, %6 + %10 = add nuw nsw i32 %5, 1 + %11 = icmp slt i32 %10, %1 + br i1 %11, label %4, label %12 + +12: ; preds = %8, %4, %2 + %13 = phi i32 [ %zext, %2 ], [ -1, %4 ], [ %9, %8 ] + ret i32 %13 +}