diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13153,6 +13153,19 @@ return N0.getOperand(0); } + // Try to narrow a truncate-of-sext_in_reg to the destination type: + // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM + if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + N0.hasOneUse()) { + SDValue X = N0.getOperand(0); + SDValue ExtVal = N0.getOperand(1); + EVT ExtVT = cast(ExtVal)->getVT(); + if (ExtVT.bitsLT(VT)) { + SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal); + } + } + // If this is anyext(trunc), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) return SDValue(); diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll --- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll +++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll @@ -181,34 +181,25 @@ ; SI-LABEL: test_smul48_i64: ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v0 -; SI-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; SI-NEXT: v_ashr_i64 v[3:4], v[0:1], 40 -; SI-NEXT: v_ashr_i64 v[1:2], v[1:2], 40 -; SI-NEXT: v_mul_i32_i24_e32 v0, v3, v1 -; SI-NEXT: v_mul_hi_i32_i24_e32 v1, v3, v1 +; SI-NEXT: v_mul_i32_i24_e32 v3, v0, v2 +; SI-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2 +; SI-NEXT: v_mov_b32_e32 v0, v3 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: test_smul48_i64: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v0 -; VI-NEXT: v_ashrrev_i64 v[3:4], 40, v[0:1] -; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v2 -; VI-NEXT: v_ashrrev_i64 v[1:2], 40, v[0:1] -; VI-NEXT: v_mul_i32_i24_e32 v0, v3, v1 -; VI-NEXT: v_mul_hi_i32_i24_e32 v1, v3, v1 +; VI-NEXT: v_mul_i32_i24_e32 v3, v0, v2 +; VI-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2 +; VI-NEXT: v_mov_b32_e32 v0, v3 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: test_smul48_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v0 -; GFX9-NEXT: v_ashrrev_i64 v[3:4], 40, v[0:1] -; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v2 -; GFX9-NEXT: v_ashrrev_i64 v[1:2], 40, v[0:1] -; GFX9-NEXT: v_mul_i32_i24_e32 v0, v3, v1 -; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v3, v1 +; GFX9-NEXT: v_mul_i32_i24_e32 v3, v0, v2 +; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2 +; GFX9-NEXT: v_mov_b32_e32 v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; EG-LABEL: test_smul48_i64: diff --git a/llvm/test/CodeGen/X86/pmulh.ll b/llvm/test/CodeGen/X86/pmulh.ll --- a/llvm/test/CodeGen/X86/pmulh.ll +++ b/llvm/test/CodeGen/X86/pmulh.ll @@ -261,12 +261,14 @@ ; ; AVX512-LABEL: sextinreg_mulhw_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpslld $24, %ymm0, %ymm0 -; AVX512-NEXT: vpsrad $24, %ymm0, %ymm0 -; AVX512-NEXT: vpslld $25, %ymm1, %ymm1 -; AVX512-NEXT: vpsrad $25, %ymm1, %ymm1 +; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512-NEXT: vpmovdw %zmm1, %ymm1 +; AVX512-NEXT: vpsllw $9, %xmm1, %xmm1 +; AVX512-NEXT: vpsraw $9, %xmm1, %xmm1 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512-NEXT: vpsllw $8, %xmm0, %xmm0 +; AVX512-NEXT: vpsraw $8, %xmm0, %xmm0 ; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/sar_fold64.ll b/llvm/test/CodeGen/X86/sar_fold64.ll --- a/llvm/test/CodeGen/X86/sar_fold64.ll +++ b/llvm/test/CodeGen/X86/sar_fold64.ll @@ -6,9 +6,8 @@ define i32 @shl48sar47(i64 %a) #0 { ; CHECK-LABEL: shl48sar47: ; CHECK: # %bb.0: -; CHECK-NEXT: movswq %di, %rax +; CHECK-NEXT: movswl %di, %eax ; CHECK-NEXT: addl %eax, %eax -; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq %1 = shl i64 %a, 48 %2 = ashr exact i64 %1, 47 @@ -32,9 +31,8 @@ define i32 @shl56sar55(i64 %a) #0 { ; CHECK-LABEL: shl56sar55: ; CHECK: # %bb.0: -; CHECK-NEXT: movsbq %dil, %rax +; CHECK-NEXT: movsbl %dil, %eax ; CHECK-NEXT: addl %eax, %eax -; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq %1 = shl i64 %a, 56 %2 = ashr exact i64 %1, 55