diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -440,6 +440,7 @@ SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitFunnelShift(SDNode *N); + SDValue visitSHLSAT(SDNode *N); SDValue visitRotate(SDNode *N); SDValue visitABS(SDNode *N); SDValue visitBSWAP(SDNode *N); @@ -1652,6 +1653,8 @@ case ISD::ROTL: return visitRotate(N); case ISD::FSHL: case ISD::FSHR: return visitFunnelShift(N); + case ISD::SSHLSAT: + case ISD::USHLSAT: return visitSHLSAT(N); case ISD::ABS: return visitABS(N); case ISD::BSWAP: return visitBSWAP(N); case ISD::BITREVERSE: return visitBITREVERSE(N); @@ -9346,6 +9349,22 @@ return SDValue(); } +SDValue DAGCombiner::visitSHLSAT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (SDValue V = DAG.simplifyShift(N0, N1)) + return V; + + EVT VT = N0.getValueType(); + + // fold (*shlsat c1, c2) -> c1<getOpcode(), SDLoc(N), VT, {N0, N1})) + return C; + + return SDValue(); +} + // Given a ABS node, detect the following pattern: // (ABS (SUB (EXTEND a), (EXTEND b))). // Generates UABD/SABD instruction. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5242,6 +5242,8 @@ case ISD::UADDSAT: return C1.uadd_sat(C2); case ISD::SSUBSAT: return C1.ssub_sat(C2); case ISD::USUBSAT: return C1.usub_sat(C2); + case ISD::SSHLSAT: return C1.sshl_sat(C2); + case ISD::USHLSAT: return C1.ushl_sat(C2); case ISD::UDIV: if (!C2.getBoolValue()) break; diff --git a/llvm/test/CodeGen/AArch64/sshl_sat.ll b/llvm/test/CodeGen/AArch64/sshl_sat.ll --- a/llvm/test/CodeGen/AArch64/sshl_sat.ll +++ b/llvm/test/CodeGen/AArch64/sshl_sat.ll @@ -8,12 +8,7 @@ define i16 @combine_shl_undef(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: mov w9, #-2147483648 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: csel w8, w9, w8, ne -; CHECK-NEXT: asr w0, w8, #16 +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 undef, i16 %y) ret i16 %tmp @@ -23,13 +18,6 @@ define i16 @combine_shl_by_undef(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_by_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w9, w0, #16 -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: cinv w8, w8, ge -; CHECK-NEXT: cmp w9, w9 -; CHECK-NEXT: csel w8, w8, w9, ne -; CHECK-NEXT: asr w0, w8, #16 ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 undef) ret i16 %tmp @@ -39,12 +27,7 @@ define i16 @combine_shl_poison(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_poison: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: mov w9, #-2147483648 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: csel w8, w9, w8, ne -; CHECK-NEXT: asr w0, w8, #16 +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 poison, i16 %y) ret i16 %tmp @@ -54,13 +37,6 @@ define i16 @combine_shl_by_poison(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_by_poison: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w9, w0, #16 -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: cinv w8, w8, ge -; CHECK-NEXT: cmp w9, w9 -; CHECK-NEXT: csel w8, w8, w9, ne -; CHECK-NEXT: asr w0, w8, #16 ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 poison) ret i16 %tmp @@ -70,12 +46,6 @@ define i16 @combine_shl_by_bitwidth(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_by_bitwidth: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w9, w0, #16 -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: cinv w8, w8, ge -; CHECK-NEXT: csel w8, w8, wzr, ne -; CHECK-NEXT: asr w0, w8, #16 ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 16) ret i16 %tmp @@ -85,12 +55,7 @@ define i16 @combine_shl_zero(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: mov w9, #-2147483648 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: csel w8, w9, w8, ne -; CHECK-NEXT: asr w0, w8, #16 +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 0, i16 %y) ret i16 %tmp @@ -100,13 +65,6 @@ define i16 @combine_shlsat_by_zero(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shlsat_by_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w9, w0, #16 -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: cinv w8, w8, ge -; CHECK-NEXT: cmp w9, w9 -; CHECK-NEXT: csel w8, w8, w9, ne -; CHECK-NEXT: asr w0, w8, #16 ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 0) ret i16 %tmp @@ -116,14 +74,7 @@ define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shlsat_constfold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #524288 -; CHECK-NEXT: mov w9, #-2147483648 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: cmp w8, #128, lsl #12 // =524288 -; CHECK-NEXT: mov w8, #2097152 -; CHECK-NEXT: csel w8, w9, w8, ne -; CHECK-NEXT: asr w0, w8, #16 +; CHECK-NEXT: mov w0, #32 ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 2) ret i16 %tmp @@ -133,12 +84,7 @@ define i16 @combine_shlsat_satmax(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shlsat_satmax: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #524288 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: cinv w8, w8, ge -; CHECK-NEXT: csel w8, w8, wzr, ne -; CHECK-NEXT: asr w0, w8, #16 +; CHECK-NEXT: mov w0, #32767 ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 15) ret i16 %tmp @@ -148,12 +94,7 @@ define i16 @combine_shlsat_satmin(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shlsat_satmin: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-524288 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: cinv w8, w8, ge -; CHECK-NEXT: csel w8, w8, wzr, ne -; CHECK-NEXT: asr w0, w8, #16 +; CHECK-NEXT: mov w0, #32768 ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 -8, i16 15) ret i16 %tmp @@ -166,33 +107,10 @@ ; CHECK-LABEL: combine_shlsat_vector: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w8, #524288 -; CHECK-NEXT: mov w9, #-2147483648 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cinv w10, w9, ge -; CHECK-NEXT: csel w11, w10, wzr, ne -; CHECK-NEXT: cmp w8, #128, lsl #12 // =524288 -; CHECK-NEXT: mov w8, #2097152 -; CHECK-NEXT: asr w11, w11, #16 -; CHECK-NEXT: csel w8, w10, w8, ne -; CHECK-NEXT: mov w10, #-524288 -; CHECK-NEXT: asr w8, w8, #16 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: cinv w9, w9, ge -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: csel w8, w9, wzr, ne -; CHECK-NEXT: cmn w10, #128, lsl #12 // =524288 -; CHECK-NEXT: mov w10, #-2097152 -; CHECK-NEXT: csel w9, w9, w10, ne -; CHECK-NEXT: asr w8, w8, #16 -; CHECK-NEXT: mov v0.h[1], w11 -; CHECK-NEXT: asr w9, w9, #16 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: umov w0, v0.h[0] -; CHECK-NEXT: umov w1, v0.h[1] -; CHECK-NEXT: umov w2, v0.h[2] -; CHECK-NEXT: umov w3, v0.h[3] +; CHECK-NEXT: mov w0, #32 +; CHECK-NEXT: mov w1, #32767 +; CHECK-NEXT: mov w2, #65504 +; CHECK-NEXT: mov w3, #32768 ; CHECK-NEXT: bl sink4xi16 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/ushl_sat.ll b/llvm/test/CodeGen/AArch64/ushl_sat.ll --- a/llvm/test/CodeGen/AArch64/ushl_sat.ll +++ b/llvm/test/CodeGen/AArch64/ushl_sat.ll @@ -8,10 +8,7 @@ define i16 @combine_shl_undef(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csetm w8, ne -; CHECK-NEXT: lsr w0, w8, #16 +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %tmp = call i16 @llvm.ushl.sat.i16(i16 undef, i16 %y) ret i16 %tmp @@ -21,10 +18,6 @@ define i16 @combine_shl_by_undef(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_by_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: cmp w8, w8 -; CHECK-NEXT: csinv w8, w8, wzr, eq -; CHECK-NEXT: lsr w0, w8, #16 ; CHECK-NEXT: ret %tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 undef) ret i16 %tmp @@ -34,10 +27,7 @@ define i16 @combine_shl_poison(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_poison: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csetm w8, ne -; CHECK-NEXT: lsr w0, w8, #16 +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %tmp = call i16 @llvm.ushl.sat.i16(i16 poison, i16 %y) ret i16 %tmp @@ -47,10 +37,6 @@ define i16 @combine_shl_by_poison(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_by_poison: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: cmp w8, w8 -; CHECK-NEXT: csinv w8, w8, wzr, eq -; CHECK-NEXT: lsr w0, w8, #16 ; CHECK-NEXT: ret %tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 poison) ret i16 %tmp @@ -60,10 +46,6 @@ define i16 @combine_shl_by_bitwidth(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_by_bitwidth: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csetm w8, ne -; CHECK-NEXT: lsr w0, w8, #16 ; CHECK-NEXT: ret %tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 16) ret i16 %tmp @@ -73,10 +55,7 @@ define i16 @combine_shl_zero(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shl_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csetm w8, ne -; CHECK-NEXT: lsr w0, w8, #16 +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %tmp = call i16 @llvm.ushl.sat.i16(i16 0, i16 %y) ret i16 %tmp @@ -86,10 +65,6 @@ define i16 @combine_shlsat_by_zero(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shlsat_by_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: cmp w8, w8 -; CHECK-NEXT: csinv w8, w8, wzr, eq -; CHECK-NEXT: lsr w0, w8, #16 ; CHECK-NEXT: ret %tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 0) ret i16 %tmp @@ -99,11 +74,7 @@ define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shlsat_constfold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #524288 -; CHECK-NEXT: cmp w8, #128, lsl #12 // =524288 -; CHECK-NEXT: mov w8, #2097152 -; CHECK-NEXT: csinv w8, w8, wzr, eq -; CHECK-NEXT: lsr w0, w8, #16 +; CHECK-NEXT: mov w0, #32 ; CHECK-NEXT: ret %tmp = call i16 @llvm.ushl.sat.i16(i16 8, i16 2) ret i16 %tmp @@ -113,10 +84,7 @@ define i16 @combine_shlsat_satmax(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shlsat_satmax: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #524288 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csetm w8, ne -; CHECK-NEXT: lsr w0, w8, #16 +; CHECK-NEXT: mov w0, #65535 ; CHECK-NEXT: ret %tmp = call i16 @llvm.ushl.sat.i16(i16 8, i16 15) ret i16 %tmp @@ -130,17 +98,8 @@ ; CHECK-LABEL: combine_shlsat_vector: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w8, #524288 -; CHECK-NEXT: mov w9, #2097152 -; CHECK-NEXT: cmp w8, #128, lsl #12 // =524288 -; CHECK-NEXT: csinv w9, w9, wzr, eq -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csetm w8, ne -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w1, v0.s[1] -; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w0, #32 +; CHECK-NEXT: mov w1, #65535 ; CHECK-NEXT: bl sink2xi16 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret