diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -91,7 +91,7 @@ case RISCV::FCVT_WU_D: case RISCV::FMV_X_W: // The following aren't W instructions, but are either sign extended from a - // smaller size or put zeros in bits 63:31. + // smaller size, always outputs a small integer, or put zeros in bits 63:31. case RISCV::LBU: case RISCV::LHU: case RISCV::LB: @@ -104,6 +104,11 @@ case RISCV::SEXT_H: case RISCV::ZEXT_H_RV64: case RISCV::FMV_X_H: + case RISCV::BEXT: + case RISCV::BEXTI: + case RISCV::CLZ: + case RISCV::CPOP: + case RISCV::CTZ: return true; // shifting right sufficiently makes the value 32-bit sign-extended case RISCV::SRAI: @@ -167,14 +172,21 @@ Worklist.push_back(SrcMI); break; } + + // For these, we just need to check if the 1st operand is sign extended. + case RISCV::BCLRI: + case RISCV::BINVI: + case RISCV::BSETI: + if (MI->getOperand(2).getImm() >= 31) + return false; + LLVM_FALLTHROUGH; case RISCV::REM: case RISCV::ANDI: case RISCV::ORI: case RISCV::XORI: { // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R. // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1 - // Logical operations use a sign extended 12-bit immediate. We just need - // to check if the other operand is sign extended. + // Logical operations use a sign extended 12-bit immediate. Register SrcReg = MI->getOperand(1).getReg(); if (!SrcReg.isVirtual()) return false; diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -315,17 +315,108 @@ } declare float @baz(i32 signext %i3) +define void @test7(i32 signext %arg, i32 signext %arg1) nounwind { +; RV64I-LABEL: test7: +; RV64I: # %bb.0: # %bb +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a2, %hi(.LCPI6_0) +; RV64I-NEXT: ld s0, %lo(.LCPI6_0)(a2) +; RV64I-NEXT: lui a2, %hi(.LCPI6_1) +; RV64I-NEXT: ld s1, %lo(.LCPI6_1)(a2) +; RV64I-NEXT: lui a2, %hi(.LCPI6_2) +; RV64I-NEXT: ld s2, %lo(.LCPI6_2)(a2) +; RV64I-NEXT: lui a2, %hi(.LCPI6_3) +; RV64I-NEXT: ld s3, %lo(.LCPI6_3)(a2) +; RV64I-NEXT: sraw a0, a0, a1 +; RV64I-NEXT: .LBB6_1: # %bb2 +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: call foo@plt +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: and a1, a1, s0 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: and a1, a0, s1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: mul a0, a0, s3 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: bnez a0, .LBB6_1 +; RV64I-NEXT: # %bb.2: # %bb7 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: test7: +; RV64ZBB: # %bb.0: # %bb +; RV64ZBB-NEXT: addi sp, sp, -16 +; RV64ZBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ZBB-NEXT: sraw a0, a0, a1 +; RV64ZBB-NEXT: .LBB6_1: # %bb2 +; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64ZBB-NEXT: call foo@plt +; RV64ZBB-NEXT: cpop a0, a0 +; RV64ZBB-NEXT: bnez a0, .LBB6_1 +; RV64ZBB-NEXT: # %bb.2: # %bb7 +; RV64ZBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ZBB-NEXT: addi sp, sp, 16 +; RV64ZBB-NEXT: ret +; +; NOREMOVAL-LABEL: test7: +; NOREMOVAL: # %bb.0: # %bb +; NOREMOVAL-NEXT: addi sp, sp, -16 +; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; NOREMOVAL-NEXT: sraw a0, a0, a1 +; NOREMOVAL-NEXT: .LBB6_1: # %bb2 +; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: call foo@plt +; NOREMOVAL-NEXT: cpop a0, a0 +; NOREMOVAL-NEXT: bnez a0, .LBB6_1 +; NOREMOVAL-NEXT: # %bb.2: # %bb7 +; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; NOREMOVAL-NEXT: addi sp, sp, 16 +; NOREMOVAL-NEXT: ret +bb: + %i = ashr i32 %arg, %arg1 + br label %bb2 + +bb2: ; preds = %bb2, %bb + %i3 = phi i32 [ %i, %bb ], [ %i6, %bb2 ] + %i4 = tail call signext i64 @foo(i32 signext %i3) + %i5 = tail call i64 @llvm.ctpop.i64(i64 %i4) + %i6 = trunc i64 %i5 to i32 + %i7 = icmp eq i32 %i6, 0 + br i1 %i7, label %bb7, label %bb2 + +bb7: ; preds = %bb2 + ret void +} + +declare i64 @llvm.ctpop.i64(i64) + define void @test8(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: sraw a0, a0, a1 -; CHECK-NEXT: .LBB6_1: # %bb2 +; CHECK-NEXT: .LBB7_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: call foo@plt ; CHECK-NEXT: ori a0, a0, -256 -; CHECK-NEXT: bnez a0, .LBB6_1 +; CHECK-NEXT: bnez a0, .LBB7_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 @@ -336,12 +427,12 @@ ; NOREMOVAL-NEXT: addi sp, sp, -16 ; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; NOREMOVAL-NEXT: sraw a0, a0, a1 -; NOREMOVAL-NEXT: .LBB6_1: # %bb2 +; NOREMOVAL-NEXT: .LBB7_1: # %bb2 ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: call foo@plt ; NOREMOVAL-NEXT: ori a0, a0, -256 -; NOREMOVAL-NEXT: bnez a0, .LBB6_1 +; NOREMOVAL-NEXT: bnez a0, .LBB7_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; NOREMOVAL-NEXT: addi sp, sp, 16 @@ -372,12 +463,12 @@ ; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: sraw a0, a0, a1 ; CHECK-NEXT: li s0, 254 -; CHECK-NEXT: .LBB7_1: # %bb2 +; CHECK-NEXT: .LBB8_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: call bar@plt ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slti a0, a0, 255 -; CHECK-NEXT: blt s0, a1, .LBB7_1 +; CHECK-NEXT: blt s0, a1, .LBB8_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -391,12 +482,12 @@ ; NOREMOVAL-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; NOREMOVAL-NEXT: sraw a1, a0, a1 ; NOREMOVAL-NEXT: li s0, 254 -; NOREMOVAL-NEXT: .LBB7_1: # %bb2 +; NOREMOVAL-NEXT: .LBB8_1: # %bb2 ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: sext.w a0, a1 ; NOREMOVAL-NEXT: call bar@plt ; NOREMOVAL-NEXT: slti a1, a0, 255 -; NOREMOVAL-NEXT: blt s0, a0, .LBB7_1 +; NOREMOVAL-NEXT: blt s0, a0, .LBB8_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; NOREMOVAL-NEXT: ld s0, 0(sp) # 8-byte Folded Reload @@ -426,12 +517,12 @@ ; CHECK-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; CHECK-NEXT: sraw a0, a0, a1 ; CHECK-NEXT: fmv.w.x fs0, zero -; CHECK-NEXT: .LBB8_1: # %bb2 +; CHECK-NEXT: .LBB9_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: call baz@plt ; CHECK-NEXT: feq.s a1, fa0, fs0 ; CHECK-NEXT: fmv.x.w a0, fa0 -; CHECK-NEXT: beqz a1, .LBB8_1 +; CHECK-NEXT: beqz a1, .LBB9_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload @@ -445,13 +536,13 @@ ; NOREMOVAL-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; NOREMOVAL-NEXT: sraw a0, a0, a1 ; NOREMOVAL-NEXT: fmv.w.x fs0, zero -; NOREMOVAL-NEXT: .LBB8_1: # %bb2 +; NOREMOVAL-NEXT: .LBB9_1: # %bb2 ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: call baz@plt ; NOREMOVAL-NEXT: feq.s a1, fa0, fs0 ; NOREMOVAL-NEXT: fmv.x.w a0, fa0 -; NOREMOVAL-NEXT: beqz a1, .LBB8_1 +; NOREMOVAL-NEXT: beqz a1, .LBB9_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; NOREMOVAL-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload