Index: llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -55,6 +55,124 @@ return new RISCVSExtWRemoval(); } +// add uses of MI to the Worklist +static void addUses(const MachineInstr &MI, + SmallVectorImpl &Worklist, + MachineRegisterInfo &MRI) { + for (auto &UserOp : MRI.reg_operands(MI.getOperand(0).getReg())) { + const auto User = UserOp.getParent(); + if (User == &MI) // ignore the def, current MI + continue; + Worklist.push_back(User); + } +} + +// returns true if all uses of OrigMI only depend on bits 31:0 of its output +static bool isAllUsesReadW(const MachineInstr &OrigMI, + MachineRegisterInfo &MRI) { + + SmallPtrSet Visited; + SmallVector Worklist; + + Visited.insert(&OrigMI); + addUses(OrigMI, Worklist, MRI); + + while (!Worklist.empty()) { + const MachineInstr *MI = Worklist.pop_back_val(); + + if (!Visited.insert(MI).second) + continue; + + switch (MI->getOpcode()) { + case RISCV::ADDIW: + case RISCV::ADDW: + case RISCV::DIVUW: + case RISCV::DIVW: + case RISCV::MULW: + case RISCV::REMUW: + case RISCV::REMW: + case RISCV::SLLIW: + case RISCV::SLLW: + case RISCV::SRAIW: + case RISCV::SRAW: + case RISCV::SRLIW: + case RISCV::SRLW: + case RISCV::SUBW: + case RISCV::ROLW: + case RISCV::RORW: + case RISCV::RORIW: + case RISCV::CLZW: + case RISCV::CTZW: + case RISCV::CPOPW: + case RISCV::FCVT_S_W: + case RISCV::FCVT_S_WU: + case RISCV::FCVT_D_W: + case RISCV::FCVT_D_WU: + continue; + + // these overwrite higher input bits, otherwise the lower word of output + // depends only on the lower word of input. So check their uses read W. + case RISCV::SLLI: + if (MI->getOperand(2).getImm() >= 32) + continue; + addUses(*MI, Worklist, MRI); + continue; + case RISCV::ANDI: + if (isUInt<11>(MI->getOperand(2).getImm())) + continue; + addUses(*MI, Worklist, MRI); + continue; + case RISCV::ORI: + if (!isUInt<11>(MI->getOperand(2).getImm())) + continue; + addUses(*MI, Worklist, MRI); + continue; + + case RISCV::BEXTI: + if (MI->getOperand(2).getImm() >= 32) + return false; + continue; + + // For these, lower word of output in these operations, depends only on + // the lower word of input. So, we check all uses only read lower word. + case RISCV::COPY: + case RISCV::PHI: + + case RISCV::ADD: + case RISCV::ADDI: + case RISCV::AND: + case RISCV::MUL: + case RISCV::OR: + case RISCV::SLL: + case RISCV::SUB: + + case RISCV::ADDUW: + case RISCV::ANDN: + case RISCV::CLMUL: + case RISCV::ORCB: + case RISCV::ORN: + case RISCV::SEXTB: + case RISCV::SEXTH: + case RISCV::SH1ADD: + case RISCV::SH1ADDUW: + case RISCV::SH2ADD: + case RISCV::SH2ADDUW: + case RISCV::SH3ADD: + case RISCV::SH3ADDUW: + case RISCV::SLLIUW: + case RISCV::XNOR: + case RISCV::XOR: + case RISCV::XORI: + case RISCV::ZEXTH_RV64: + addUses(*MI, Worklist, MRI); + continue; + default: + return false; + } + } + return true; +} + // This function returns true if the machine instruction always outputs a value // where bits 63:32 match bit 31. // TODO: Allocate a bit in TSFlags for the W instructions? @@ -90,7 +208,7 @@ case RISCV::FCVT_W_D: case RISCV::FCVT_WU_D: // The following aren't W instructions, but are either sign extended from a - // smaller size or put zeros in bits 63:31. + // smaller size, always outputs a small integer, or put zeros in bits 63:31. case RISCV::LBU: case RISCV::LHU: case RISCV::LB: @@ -102,6 +220,11 @@ case RISCV::SEXTB: case RISCV::SEXTH: case RISCV::ZEXTH_RV64: + case RISCV::BEXT: + case RISCV::BEXTI: + case RISCV::CLZ: + case RISCV::CPOP: + case RISCV::CTZ: return true; // shifting right sufficiently makes the value 32-bit sign-extended case RISCV::SRAI: @@ -165,14 +288,21 @@ Worklist.push_back(SrcMI); break; } + + // For these We just need to check if the 1st operand is sign extended. + case RISCV::BCLRI: + case RISCV::BINVI: + case RISCV::BSETI: + if (MI->getOperand(2).getImm() >= 32) + return false; + LLVM_FALLTHROUGH; case RISCV::REM: case RISCV::ANDI: case RISCV::ORI: case RISCV::XORI: { // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R. // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1 - // Logical operations use a sign extended 12-bit immediate. We just need - // to check if the other operand is sign extended. + // Logical operations use a sign extended 12-bit immediate. Register SrcReg = MI->getOperand(1).getReg(); if (!SrcReg.isVirtual()) return false; @@ -258,8 +388,16 @@ continue; const MachineInstr &SrcMI = *MRI.getVRegDef(SrcReg); - if (!isSignExtendedW(SrcMI, MRI)) - continue; + + // If all uses only depend on the lower 32-bits of the result of MI, + // then sext.w is unnecessary + if (!isAllUsesReadW(*MI, MRI)) { + // If all definitions reaching MI sign-extend their output, + // then sext.w is redundant + if (!isSignExtendedW(SrcMI, MRI)) + continue; + } + // either condition is satisfied, so proceed with deleting the sext.w Register DstReg = MI->getOperand(0).getReg(); if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg))) Index: llvm/test/CodeGen/RISCV/sextw-removal.ll =================================================================== --- llvm/test/CodeGen/RISCV/sextw-removal.ll +++ llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -419,3 +419,50 @@ bb7: ; preds = %bb2 ret void } + +define i64 @test10(i64 %arg1, i64 %arg2) { +; CHECK-LABEL: test10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 254 +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB8_1: # %bb2 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mul a3, a3, a1 +; CHECK-NEXT: add a3, a3, a1 +; CHECK-NEXT: blt a2, a0, .LBB8_1 +; CHECK-NEXT: # %bb.2: # %bb7 +; CHECK-NEXT: sext.w a0, a3 +; CHECK-NEXT: ret +; +; NOREMOVAL-LABEL: test10: +; NOREMOVAL: # %bb.0: # %entry +; NOREMOVAL-NEXT: li a2, 254 +; NOREMOVAL-NEXT: mv a3, a0 +; NOREMOVAL-NEXT: .LBB8_1: # %bb2 +; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: sext.w a3, a3 +; NOREMOVAL-NEXT: mul a3, a3, a1 +; NOREMOVAL-NEXT: add a3, a3, a1 +; NOREMOVAL-NEXT: blt a2, a0, .LBB8_1 +; NOREMOVAL-NEXT: # %bb.2: # %bb7 +; NOREMOVAL-NEXT: sext.w a0, a3 +; NOREMOVAL-NEXT: ret +entry: + br label %bb2 + +bb2: ; preds = %bb2, %bb + %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ] + %i2 = shl i64 %i1, 32 + %i3 = ashr i64 %i2, 32 + %i4 = mul i64 %i3, %arg2 + %i5 = add i64 %i4, %arg2 + %i6 = icmp slt i64 %arg1, 255 + br i1 %i6, label %bb7, label %bb2 + +bb7: ; preds = %bb2 + %i7 = shl i64 %i5, 32 + %i8 = ashr i64 %i7, 32 + ret i64 %i8 +} + +declare i64 @helper(i64)