diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1712,34 +1712,49 @@ // which will choose the cheapest for the target. switch (CCCode) { default: llvm_unreachable("Unknown integer comparison!"); + case ISD::SETUGE: + case ISD::SETUGT: + case ISD::SETULE: + case ISD::SETULT: case ISD::SETEQ: case ISD::SETNE: { SDValue OpL = GetPromotedInteger(LHS); SDValue OpR = GetPromotedInteger(RHS); - // We would prefer to promote the comparison operand with sign extension. - // If the width of OpL/OpR excluding the duplicated sign bits is no greater - // than the width of LHS/RHS, we can avoid inserting real truncate - // instruction, which is redundant eventually. - unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); - unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR); - if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && - OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { - LHS = OpL; - RHS = OpR; + if (TLI.isSExtCheaperThanZExt(LHS.getValueType(), OpL.getValueType())) { + // Target would prefer to promote the comparison operand with sign + // extension. Honor that unless the promoted values are already zero + // extended. + unsigned OpLEffectiveBits = + DAG.computeKnownBits(OpL).countMaxActiveBits(); + unsigned OpREffectiveBits = + DAG.computeKnownBits(OpR).countMaxActiveBits(); + if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && + OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { + LHS = OpL; + RHS = OpR; + } else { + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + } } else { - LHS = SExtOrZExtPromotedInteger(LHS); - RHS = SExtOrZExtPromotedInteger(RHS); + // We would prefer to promote the comparison operand with zero extension. + // If the width of OpL/OpR excluding the duplicated sign bits is no + // greater than the width of LHS/RHS, we can avoid inserting a zero extend + // operation that we might not be able to remove. + unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); + unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR); + if (OpLEffectiveBits <= LHS.getScalarValueSizeInBits() && + OpREffectiveBits <= RHS.getScalarValueSizeInBits()) { + LHS = OpL; + RHS = OpR; + } else { + LHS = ZExtPromotedInteger(LHS); + RHS = ZExtPromotedInteger(RHS); + } } break; } - case ISD::SETUGE: - case ISD::SETUGT: - case ISD::SETULE: - case ISD::SETULT: - LHS = SExtOrZExtPromotedInteger(LHS); - RHS = SExtOrZExtPromotedInteger(RHS); - break; case ISD::SETGE: case ISD::SETGT: case ISD::SETLT: diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1072,10 +1072,9 @@ ; RV64-NEXT: call __gnu_h2f_ieee@plt ; RV64-NEXT: fmv.w.x ft0, a0 ; RV64-NEXT: fcvt.lu.s a0, ft0, rtz -; RV64-NEXT: sext.w a2, a0 ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 -; RV64-NEXT: bltu a2, a1, .LBB16_2 +; RV64-NEXT: bltu a0, a1, .LBB16_2 ; RV64-NEXT: # %bb.1: # %entry ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB16_2: # %entry diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -1091,7 +1091,6 @@ ; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -1102,12 +1101,11 @@ ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 ; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: .cfi_offset s9, -88 -; CHECK-NEXT: lhu s5, 0(a1) +; CHECK-NEXT: lhu s6, 0(a1) ; CHECK-NEXT: lhu s2, 56(a1) ; CHECK-NEXT: lhu s3, 48(a1) ; CHECK-NEXT: lhu s4, 40(a1) -; CHECK-NEXT: lhu s6, 32(a1) +; CHECK-NEXT: lhu s5, 32(a1) ; CHECK-NEXT: lhu s7, 24(a1) ; CHECK-NEXT: lhu s1, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) @@ -1117,13 +1115,13 @@ ; CHECK-NEXT: mv s8, a0 ; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s9, a0 +; CHECK-NEXT: mv s1, a0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: mv a0, s6 +; CHECK-NEXT: mv s7, a0 +; CHECK-NEXT: mv a0, s5 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: mv s6, a0 +; CHECK-NEXT: mv s5, a0 ; CHECK-NEXT: mv a0, s4 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s4, a0 @@ -1134,69 +1132,61 @@ ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: mv s2, a0 ; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s9 -; CHECK-NEXT: fcvt.lu.s s7, ft0, rtz +; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill ; CHECK-NEXT: fmv.w.x ft0, s8 ; CHECK-NEXT: fcvt.lu.s s8, ft0, rtz -; CHECK-NEXT: sext.w s1, s8 -; CHECK-NEXT: mv a0, s5 +; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.w.x ft0, a0 ; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz -; CHECK-NEXT: sext.w a0, a6 -; CHECK-NEXT: lui a1, 16 -; CHECK-NEXT: addiw a1, a1, -1 -; CHECK-NEXT: bltu a0, a1, .LBB16_2 +; CHECK-NEXT: lui a0, 16 +; CHECK-NEXT: addiw a1, a0, -1 +; CHECK-NEXT: bltu a6, a1, .LBB16_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: .LBB16_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s6 -; CHECK-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz -; CHECK-NEXT: sext.w a4, s7 -; CHECK-NEXT: bltu s1, a1, .LBB16_4 +; CHECK-NEXT: fmv.w.x ft1, s7 +; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: fcvt.lu.s a2, ft0, rtz +; CHECK-NEXT: bltu s8, a1, .LBB16_4 ; CHECK-NEXT: # %bb.3: # %entry ; CHECK-NEXT: mv s8, a1 ; CHECK-NEXT: .LBB16_4: # %entry -; CHECK-NEXT: fmv.w.x ft1, s4 -; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz -; CHECK-NEXT: sext.w a5, a7 -; CHECK-NEXT: bltu a4, a1, .LBB16_6 +; CHECK-NEXT: fmv.w.x ft0, s5 +; CHECK-NEXT: fcvt.lu.s a3, ft1, rtz +; CHECK-NEXT: bltu a2, a1, .LBB16_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv s7, a1 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB16_6: # %entry -; CHECK-NEXT: fmv.w.x ft0, s3 -; CHECK-NEXT: fcvt.lu.s a4, ft1, rtz -; CHECK-NEXT: sext.w s1, a3 -; CHECK-NEXT: bltu a5, a1, .LBB16_8 +; CHECK-NEXT: fmv.w.x ft1, s4 +; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz +; CHECK-NEXT: bltu a3, a1, .LBB16_8 ; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a7, a1 +; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB16_8: # %entry -; CHECK-NEXT: fmv.w.x ft1, s2 -; CHECK-NEXT: fcvt.lu.s a5, ft0, rtz -; CHECK-NEXT: sext.w a0, a4 -; CHECK-NEXT: bltu s1, a1, .LBB16_10 +; CHECK-NEXT: fmv.w.x ft0, s3 +; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz +; CHECK-NEXT: bltu a4, a1, .LBB16_10 ; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB16_10: # %entry -; CHECK-NEXT: fcvt.lu.s s1, ft1, rtz -; CHECK-NEXT: sext.w a2, a5 -; CHECK-NEXT: bgeu a0, a1, .LBB16_15 +; CHECK-NEXT: fmv.w.x ft1, s2 +; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz +; CHECK-NEXT: bgeu a5, a1, .LBB16_15 ; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: sext.w a0, s1 -; CHECK-NEXT: bgeu a2, a1, .LBB16_16 +; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz +; CHECK-NEXT: bgeu s1, a1, .LBB16_16 ; CHECK-NEXT: .LBB16_12: # %entry ; CHECK-NEXT: bltu a0, a1, .LBB16_14 ; CHECK-NEXT: .LBB16_13: # %entry -; CHECK-NEXT: mv s1, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB16_14: # %entry -; CHECK-NEXT: sh s1, 14(s0) -; CHECK-NEXT: sh a5, 12(s0) -; CHECK-NEXT: sh a4, 10(s0) -; CHECK-NEXT: sh a3, 8(s0) -; CHECK-NEXT: sh a7, 6(s0) -; CHECK-NEXT: sh s7, 4(s0) +; CHECK-NEXT: sh a0, 14(s0) +; CHECK-NEXT: sh s1, 12(s0) +; CHECK-NEXT: sh a5, 10(s0) +; CHECK-NEXT: sh a4, 8(s0) +; CHECK-NEXT: sh a3, 6(s0) +; CHECK-NEXT: sh a2, 4(s0) ; CHECK-NEXT: sh s8, 2(s0) ; CHECK-NEXT: sh a6, 0(s0) ; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload @@ -1209,15 +1199,14 @@ ; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 96 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB16_15: # %entry -; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: sext.w a0, s1 -; CHECK-NEXT: bltu a2, a1, .LBB16_12 -; CHECK-NEXT: .LBB16_16: # %entry ; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz +; CHECK-NEXT: bltu s1, a1, .LBB16_12 +; CHECK-NEXT: .LBB16_16: # %entry +; CHECK-NEXT: mv s1, a1 ; CHECK-NEXT: bgeu a0, a1, .LBB16_13 ; CHECK-NEXT: j .LBB16_14 entry: diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -745,13 +745,12 @@ ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: sext.w a2, a0 -; RV64I-NEXT: li a1, 1 -; RV64I-NEXT: beqz a2, .LBB7_2 -; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: beqz a1, .LBB7_2 +; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB7_2: ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret