Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
===================================================================
--- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -268,6 +268,19 @@
   bool applyCombineExtOfExt(MachineInstr &MI,
                             std::tuple<Register, unsigned> &MatchInfo);
 
+  /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
+  bool matchCombineTruncOfExt(MachineInstr &MI,
+                              std::pair<Register, unsigned> &MatchInfo);
+  bool applyCombineTruncOfExt(MachineInstr &MI,
+                              std::pair<Register, unsigned> &MatchInfo);
+
+  /// Transform trunc (shl x, K) to shl (trunc x),
+  /// K => K < VT.getScalarSizeInBits().
+  bool matchCombineTruncOfShl(MachineInstr &MI,
+                              std::pair<Register, Register> &MatchInfo);
+  bool applyCombineTruncOfShl(MachineInstr &MI,
+                              std::pair<Register, Register> &MatchInfo);
+
   /// Return true if any explicit use operand on \p MI is defined by a
   /// G_IMPLICIT_DEF.
   bool matchAnyExplicitUseIsUndef(MachineInstr &MI);
Index: llvm/include/llvm/Target/GlobalISel/Combine.td
===================================================================
--- llvm/include/llvm/Target/GlobalISel/Combine.td
+++ llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -198,7 +198,7 @@
 // replaced with undef.
 def propagate_undef_any_op: GICombineRule<
   (defs root:$root),
-  (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR):$root,
+  (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC):$root,
          [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
   (apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
 
@@ -381,6 +381,24 @@
   (apply [{ return Helper.applyNotCmp(*${d}, ${info}); }])
 >;
 
+// Fold trunc ([asz]ext x) -> x or ([asz]ext x) or (trunc x).
+def trunc_ext_fold_matchinfo : GIDefMatchData<"std::pair<Register, unsigned>">;
+def trunc_ext_fold: GICombineRule <
+  (defs root:$root, trunc_ext_fold_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_TRUNC):$root,
+         [{ return Helper.matchCombineTruncOfExt(*${root}, ${matchinfo}); }]),
+  (apply [{ return Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }])
+>;
+
+// Fold trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits().
+def trunc_shl_matchinfo : GIDefMatchData<"std::pair<Register, Register>">;
+def trunc_shl: GICombineRule <
+  (defs root:$root, trunc_shl_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_TRUNC):$root,
+         [{ return Helper.matchCombineTruncOfShl(*${root}, ${matchinfo}); }]),
+  (apply [{ return Helper.applyCombineTruncOfShl(*${root}, ${matchinfo}); }])
+>;
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -409,4 +427,4 @@
     shl_ashr_to_sext_inreg, sext_inreg_of_load,
     width_reduction_combines, select_combines,
     known_bits_simplifications, ext_ext_fold,
-    not_cmp_fold]>;
+    not_cmp_fold, trunc_ext_fold, trunc_shl]>;
Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
===================================================================
--- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1816,6 +1816,82 @@
   return false;
 }
 
+bool CombinerHelper::matchCombineTruncOfExt(
+    MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+  Register SrcReg = MI.getOperand(1).getReg();
+  MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+  unsigned SrcOpc = SrcMI->getOpcode();
+  if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
+      SrcOpc == TargetOpcode::G_ZEXT) {
+    MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
+    return true;
+  }
+  return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfExt(
+    MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+  Register SrcReg = MatchInfo.first;
+  unsigned SrcExtOp = MatchInfo.second;
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT SrcTy = MRI.getType(SrcReg);
+  LLT DstTy = MRI.getType(DstReg);
+  if (SrcTy == DstTy) {
+    MI.eraseFromParent();
+    replaceRegWith(MRI, DstReg, SrcReg);
+    return true;
+  }
+  Builder.setInstrAndDebugLoc(MI);
+  if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
+    Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
+  else
+    Builder.buildTrunc(DstReg, SrcReg);
+  MI.eraseFromParent();
+  return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfShl(
+    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  Register ShiftSrc;
+  Register ShiftAmt;
+
+  LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(DstTy);
+  if (MRI.hasOneNonDBGUse(SrcReg) &&
+      mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
+      isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {DstTy, ShiftAmtTy}})) {
+    KnownBits Known = KB->getKnownBits(ShiftAmt);
+    unsigned Size = DstTy.getSizeInBits();
+    if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+      MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfShl(
+    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  LLT DstTy = MRI.getType(DstReg);
+  MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+
+  Register ShiftSrc = MatchInfo.first;
+  Register ShiftAmt = MatchInfo.second;
+  Builder.setInstrAndDebugLoc(MI);
+  Builder.buildShl(DstReg, Builder.buildTrunc(DstTy, ShiftSrc),
+                   Builder.buildTrunc(DstTy, ShiftAmt), SrcMI->getFlags());
+  MI.eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
     return MO.isReg() &&
Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
===================================================================
--- llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -107,8 +107,8 @@
 ; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s96) = G_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_ (in function: nonpow2_add_narrowing)
 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing
 ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing:
-define void @nonpow2_add_narrowing() {
-  %a = add i128 undef, undef
+define void @nonpow2_add_narrowing(i128 %x, i128 %y) {
+  %a = add i128 %x, %y
   %b = trunc i128 %a to i96
   %dummy = add i96 %b, %b
   store i96 %dummy, i96* undef
Index: llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
@@ -0,0 +1,142 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs  %s | FileCheck %s
+---
+name:            test_combine_trunc_undef
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_combine_trunc_undef
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: $w0 = COPY [[DEF]](s32)
+    %0:_(s64) = G_IMPLICIT_DEF
+    %1:_(s32) = G_TRUNC %0(s64)
+    $w0 = COPY %1(s32)
+...
+---
+name:            test_combine_trunc_undef_vec
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_combine_trunc_undef_vec
+    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: $x0 = COPY [[DEF]](<2 x s32>)
+    %0:_(<2 x s64>) = G_IMPLICIT_DEF
+    %1:_(<2 x s32>) = G_TRUNC %0(<2 x s64>)
+    $x0 = COPY %1(<2 x s32>)
+...
+---
+name:            test_combine_trunc_anyext_s32_s16
+body:             |
+  bb.1:
+  liveins: $h0
+    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16)
+    ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+    %0:_(s16) = COPY $h0
+    %1:_(s64) = G_ANYEXT %0(s16)
+    %2:_(s32) = G_TRUNC %1(s64)
+    $w0 = COPY %2(s32)
+...
+---
+name:            test_combine_trunc_anyext_s32_s16_vec
+body:             |
+  bb.1:
+  liveins: $s0
+    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16_vec
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[COPY]](<2 x s16>)
+    ; CHECK: $x0 = COPY [[ANYEXT]](<2 x s32>)
+    %0:_(<2 x s16>) = COPY $s0
+    %1:_(<2 x s64>) = G_ANYEXT %0(<2 x s16>)
+    %2:_(<2 x s32>) = G_TRUNC %1(<2 x s64>)
+    $x0 = COPY %2(<2 x s32>)
+...
+---
+name:            test_combine_trunc_sext_s32_s16
+body:             |
+  bb.1:
+  liveins: $h0
+    ; CHECK-LABEL: name: test_combine_trunc_sext_s32_s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16)
+    ; CHECK: $w0 = COPY [[SEXT]](s32)
+    %0:_(s16) = COPY $h0
+    %1:_(s64) = G_SEXT %0(s16)
+    %2:_(s32) = G_TRUNC %1(s64)
+    $w0 = COPY %2(s32)
+...
+---
+name:            test_combine_trunc_zext_s32_s16
+body:             |
+  bb.1:
+  liveins: $h0
+    ; CHECK-LABEL: name: test_combine_trunc_zext_s32_s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16)
+    ; CHECK: $w0 = COPY [[ZEXT]](s32)
+    %0:_(s16) = COPY $h0
+    %1:_(s64) = G_ZEXT %0(s16)
+    %2:_(s32) = G_TRUNC %1(s64)
+    $w0 = COPY %2(s32)
+...
+---
+name:            test_combine_trunc_anyext_s32_s32
+body:             |
+  bb.1:
+  liveins: $w0
+    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: $w0 = COPY [[COPY]](s32)
+    %0:_(s32) = COPY $w0
+    %1:_(s64) = G_ANYEXT %0(s32)
+    %2:_(s32) = G_TRUNC %1(s64)
+    $w0 = COPY %2(s32)
+...
+---
+name:            test_combine_trunc_anyext_s32_s64
+body:             |
+  bb.1:
+  liveins: $x0
+    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK: $w0 = COPY [[TRUNC]](s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s128) = G_ANYEXT %0(s64)
+    %2:_(s32) = G_TRUNC %1(s128)
+    $w0 = COPY %2(s32)
+...
+---
+name:            test_combine_trunc_shl_s32_by_2
+body:             |
+  bb.1:
+  liveins: $w0
+    ; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_2
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
+    ; CHECK: $h0 = COPY [[SHL]](s16)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = G_CONSTANT i32 2
+    %2:_(s32) = G_SHL %0(s32), %1(s32)
+    %3:_(s16) = G_TRUNC %2(s32)
+    $h0 = COPY %3(s16)
+...
+---
+name:            test_combine_trunc_shl_s32_by_17
+body:             |
+  bb.1:
+  liveins: $w0
+    ; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_17
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK: $h0 = COPY [[TRUNC]](s16)
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = G_CONSTANT i32 17
+    %2:_(s32) = G_SHL %0(s32), %1(s32)
+    %3:_(s16) = G_TRUNC %2(s32)
+    $h0 = COPY %3(s16)
+...
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
@@ -82,14 +82,14 @@
 ;
 ; GFX8-LABEL: s_shl_i8_7:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_and_b32 s0, s0, 0xff
-; GFX8-NEXT:    s_lshl_b32 s0, s0, 7
+; GFX8-NEXT:    s_bfe_u32 s1, 7, 0x100000
+; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-LABEL: s_shl_i8_7:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_and_b32 s0, s0, 0xff
-; GFX9-NEXT:    s_lshl_b32 s0, s0, 7
+; GFX9-NEXT:    s_bfe_u32 s1, 7, 0x100000
+; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
 ; GFX9-NEXT:    ; return to shader part epilog
   %result = shl i8 %value, 7
   ret i8 %result
@@ -426,14 +426,14 @@
 ;
 ; GFX8-LABEL: s_shl_i16_15:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
-; GFX8-NEXT:    s_lshl_b32 s0, s0, 15
+; GFX8-NEXT:    s_bfe_u32 s1, 15, 0x100000
+; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-LABEL: s_shl_i16_15:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
-; GFX9-NEXT:    s_lshl_b32 s0, s0, 15
+; GFX9-NEXT:    s_bfe_u32 s1, 15, 0x100000
+; GFX9-NEXT:    s_lshl_b32 s0, s0, s1
 ; GFX9-NEXT:    ; return to shader part epilog
   %result = shl i16 %value, 15
   ret i16 %result
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -37,7 +37,6 @@
 ; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 ; GFX8-NEXT:    s_mov_b32 s3, s2
 ; GFX8-NEXT:    s_and_b32 s0, s0, s2
-; GFX8-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 ; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX8-NEXT:    s_and_b32 s0, s0, s2
@@ -121,10 +120,8 @@
 ; GFX8-NEXT:    s_mov_b32 s5, s4
 ; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
 ; GFX8-NEXT:    s_and_b32 s6, s1, s4
-; GFX8-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
-; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
-; GFX8-NEXT:    s_and_b64 s[2:3], s[6:7], s[4:5]
-; GFX8-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[2:3], s[4:5]
+; GFX8-NEXT:    s_xor_b64 s[2:3], s[6:7], s[4:5]
 ; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 ; GFX8-NEXT:    s_and_b32 s0, s0, s4
 ; GFX8-NEXT:    s_or_b32 s0, s1, s0