diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -440,6 +440,7 @@ SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitFunnelShift(SDNode *N); + SDValue visitSHLSAT(SDNode *N); SDValue visitRotate(SDNode *N); SDValue visitABS(SDNode *N); SDValue visitBSWAP(SDNode *N); @@ -1653,6 +1654,8 @@ case ISD::ROTL: return visitRotate(N); case ISD::FSHL: case ISD::FSHR: return visitFunnelShift(N); + case ISD::SSHLSAT: + case ISD::USHLSAT: return visitSHLSAT(N); case ISD::ABS: return visitABS(N); case ISD::BSWAP: return visitBSWAP(N); case ISD::BITREVERSE: return visitBITREVERSE(N); @@ -9347,6 +9350,22 @@ return SDValue(); } +SDValue DAGCombiner::visitSHLSAT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (SDValue V = DAG.simplifyShift(N0, N1)) + return V; + + EVT VT = N0.getValueType(); + + // fold (*shlsat c1, c2) -> c1<getOpcode(), SDLoc(N), VT, + {N0, N1})) + return C; + + return SDValue(); +} + // Given a ABS node, detect the following pattern: // (ABS (SUB (EXTEND a), (EXTEND b))). // Generates UABD/SABD instruction. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5242,6 +5242,8 @@ case ISD::UADDSAT: return C1.uadd_sat(C2); case ISD::SSUBSAT: return C1.ssub_sat(C2); case ISD::USUBSAT: return C1.usub_sat(C2); + case ISD::SSHLSAT: return C1.sshl_sat(C2); + case ISD::USHLSAT: return C1.ushl_sat(C2); case ISD::UDIV: if (!C2.getBoolValue()) break; diff --git a/llvm/test/CodeGen/AArch64/sshl_sat.ll b/llvm/test/CodeGen/AArch64/sshl_sat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sshl_sat.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +declare i16 @llvm.sshl.sat.i16(i16, i16) +declare <4 x i16> @llvm.sshl.sat.v4i16(<4 x i16>, <4 x i16>) + +; fold (shlsat undef, x) -> 0 +define i16 @combine_shl_undef(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %tmp = call i16 @llvm.sshl.sat.i16(i16 undef, i16 %y) + ret i16 %tmp +} + +; fold (shlsat x, undef) -> undef +define i16 @combine_shl_by_undef(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_by_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 undef) + ret i16 %tmp +} + +; fold (shlsat poison, x) -> 0 +define i16 @combine_shl_poison(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_poison: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %tmp = call i16 @llvm.sshl.sat.i16(i16 poison, i16 %y) + ret i16 %tmp +} + +; fold (shlsat x, poison) -> undef +define i16 @combine_shl_by_poison(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_by_poison: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 poison) + ret i16 %tmp +} + +; fold (shlsat x, bitwidth) -> undef +define i16 @combine_shl_by_bitwidth(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_by_bitwidth: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 16) + ret i16 %tmp +} + +; fold (shlsat 0, x) -> 0 +define i16 @combine_shl_zero(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %tmp = call i16 @llvm.sshl.sat.i16(i16 0, i16 %y) + ret i16 %tmp +} + +; fold (shlsat x, 0) -> x +define i16 @combine_shlsat_by_zero(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shlsat_by_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %tmp = call i16 @llvm.sshl.sat.i16(i16 %x, i16 0) + ret i16 %tmp +} + +; fold (shlsat c1, c2) -> c3 +define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shlsat_constfold: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #32 +; CHECK-NEXT: ret + %tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 2) + ret i16 %tmp +} + +; fold (shlsat c1, c2) -> sat max +define i16 @combine_shlsat_satmax(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shlsat_satmax: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #32767 +; CHECK-NEXT: ret + %tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 15) + ret i16 %tmp +} + +; fold (shlsat c1, c2) -> sat min +define i16 @combine_shlsat_satmin(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shlsat_satmin: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #32768 +; CHECK-NEXT: ret + %tmp = call i16 @llvm.sshl.sat.i16(i16 -8, i16 15) + ret i16 %tmp +} + +declare void @sink4xi16(i16, i16, i16, i16) + +; fold (shlsat c1, c2) -> c3 , c1/c2/c3 being vectors +define void @combine_shlsat_vector() nounwind { +; CHECK-LABEL: combine_shlsat_vector: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w0, #32 +; CHECK-NEXT: mov w1, #32767 +; CHECK-NEXT: mov w2, #65504 +; CHECK-NEXT: mov w3, #32768 +; CHECK-NEXT: bl sink4xi16 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %tmp = call <4 x i16> @llvm.sshl.sat.v4i16( + <4 x i16>, + <4 x i16>) + ; Pass elements as arguments in a call to get CHECK statements that verify + ; the constant folding. + %e0 = extractelement <4 x i16> %tmp, i16 0 + %e1 = extractelement <4 x i16> %tmp, i16 1 + %e2 = extractelement <4 x i16> %tmp, i16 2 + %e3 = extractelement <4 x i16> %tmp, i16 3 + call void @sink4xi16(i16 %e0, i16 %e1, i16 %e2, i16 %e3) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/ushl_sat.ll b/llvm/test/CodeGen/AArch64/ushl_sat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ushl_sat.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +declare i16 @llvm.ushl.sat.i16(i16, i16) +declare <2 x i16> @llvm.ushl.sat.v2i16(<2 x i16>, <2 x i16>) + +; fold (shlsat undef, x) -> 0 +define i16 @combine_shl_undef(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %tmp = call i16 @llvm.ushl.sat.i16(i16 undef, i16 %y) + ret i16 %tmp +} + +; fold (shlsat x, undef) -> undef +define i16 @combine_shl_by_undef(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_by_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 undef) + ret i16 %tmp +} + +; fold (shlsat poison, x) -> 0 +define i16 @combine_shl_poison(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_poison: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %tmp = call i16 @llvm.ushl.sat.i16(i16 poison, i16 %y) + ret i16 %tmp +} + +; fold (shlsat x, poison) -> undef +define i16 @combine_shl_by_poison(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_by_poison: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 poison) + ret i16 %tmp +} + +; fold (shlsat x, bitwidth) -> undef +define i16 @combine_shl_by_bitwidth(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_by_bitwidth: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 16) + ret i16 %tmp +} + +; fold (ushlsat 0, x) -> 0 +define i16 @combine_shl_zero(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shl_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %tmp = call i16 @llvm.ushl.sat.i16(i16 0, i16 %y) + ret i16 %tmp +} + +; fold (ushlsat x, 0) -> x +define i16 @combine_shlsat_by_zero(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shlsat_by_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %tmp = call i16 @llvm.ushl.sat.i16(i16 %x, i16 0) + ret i16 %tmp +} + +; fold (ushlsat c1, c2) -> c3 +define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shlsat_constfold: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #32 +; CHECK-NEXT: ret + %tmp = call i16 @llvm.ushl.sat.i16(i16 8, i16 2) + ret i16 %tmp +} + +; fold (ushlsat c1, c2) -> sat max +define i16 @combine_shlsat_satmax(i16 %x, i16 %y) nounwind { +; CHECK-LABEL: combine_shlsat_satmax: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #65535 +; CHECK-NEXT: ret + %tmp = call i16 @llvm.ushl.sat.i16(i16 8, i16 15) + ret i16 %tmp +} + + +declare void @sink2xi16(i16, i16) + +; fold (ushlsat c1, c2) -> c3 , c1/c2/c3 being vectors +define void @combine_shlsat_vector() nounwind { +; CHECK-LABEL: combine_shlsat_vector: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov w0, #32 +; CHECK-NEXT: mov w1, #65535 +; CHECK-NEXT: bl sink2xi16 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %tmp = call <2 x i16> @llvm.ushl.sat.v2i16(<2 x i16>, + <2 x i16>) + ; Pass elements as arguments in a call to get CHECK statements that verify + ; the constant folding. + %e0 = extractelement <2 x i16> %tmp, i16 0 + %e1 = extractelement <2 x i16> %tmp, i16 1 + call void @sink2xi16(i16 %e0, i16 %e1) + ret void +}