diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -199,6 +199,10 @@
 
   // Function alignments.
   setMinFunctionAlignment(Align(4));
+  // Set preferred alignments.
+  setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
+  setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
+  setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
 
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -52,6 +52,10 @@
   LoongArchTargetLowering TLInfo;
   SelectionDAGTargetInfo TSInfo;
 
+  Align PrefFunctionAlignment;
+  Align PrefLoopAlignment;
+  unsigned MaxBytesForAlignment;
+
   /// Initializes using the passed in CPU and feature strings so that we can
   /// use initializer lists for subtarget initialization.
   LoongArchSubtarget &initializeSubtargetDependencies(const Triple &TT,
@@ -60,6 +64,9 @@
                                                       StringRef FS,
                                                       StringRef ABIName);
 
+  /// Initialize properties based on the selected processor family.
+  void initializeProperties(StringRef TuneCPU);
+
 public:
   // Initializes the data members to match that of the specified triple.
   LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
@@ -97,6 +104,9 @@
   unsigned getGRLen() const { return GRLen; }
   LoongArchABI::ABI getTargetABI() const { return TargetABI; }
   bool isXRaySupported() const override { return is64Bit(); }
+  Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
+  Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
+  unsigned getMaxBytesForAlignment() const { return MaxBytesForAlignment; }
 };
 } // end namespace llvm
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
@@ -35,6 +35,7 @@
     TuneCPU = CPU;
 
   ParseSubtargetFeatures(CPU, TuneCPU, FS);
+  initializeProperties(TuneCPU);
   if (Is64Bit) {
     GRLenVT = MVT::i64;
     GRLen = 64;
@@ -54,6 +55,32 @@
   return *this;
 }
 
+void LoongArchSubtarget::initializeProperties(StringRef TuneCPU) {
+  // Initialize CPU specific properties. We should add a tablegen feature for
+  // this in the future so we can specify it together with the subtarget
+  // features.
+
+  // TODO: Check TuneCPU and override defaults (that are for LA464) once we
+  // support optimizing for more uarchs.
+
+  // Default to the alignment settings empirically confirmed to perform best
+  // on LA464, with 4-wide instruction fetch and decode stages. These settings
+  // can also be overridden in initializeProperties.
+  //
+  // We default to such higher-than-minimum alignments because we assume that:
+  //
+  // * these settings should benefit most existing uarchs/users,
+  // * future general-purpose LoongArch cores are likely to have issue widths
+  //   equal to or wider than 4,
+  // * instruction sequences best for LA464 should not pessimize other future
+  //   uarchs, and
+  // * narrower cores would not suffer much (aside from slightly increased
+  //   ICache footprint maybe), compared to the gains everywhere else.
+  PrefFunctionAlignment = Align(32);
+  PrefLoopAlignment = Align(16);
+  MaxBytesForAlignment = 16;
+}
+
 LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU,
                                        StringRef TuneCPU, StringRef FS,
                                        StringRef ABIName,
diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
--- a/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/LoongArch/atomicrmw-uinc-udec-wrap.ll
@@ -13,6 +13,7 @@
 ; LA64-NEXT:    andi $a0, $a0, 24
 ; LA64-NEXT:    nor $a4, $a4, $zero
 ; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB0_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB0_3 Depth 2
@@ -66,6 +67,7 @@
 ; LA64-NEXT:    andi $a0, $a0, 24
 ; LA64-NEXT:    nor $a4, $a4, $zero
 ; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB1_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB1_3 Depth 2
@@ -111,6 +113,7 @@
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    ld.w $a3, $a0, 0
 ; LA64-NEXT:    bstrpick.d $a2, $a1, 31, 0
+; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB2_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB2_3 Depth 2
@@ -150,6 +153,7 @@
 ; LA64-LABEL: atomicrmw_uinc_wrap_i64:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    ld.d $a2, $a0, 0
+; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB3_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB3_3 Depth 2
@@ -195,6 +199,7 @@
 ; LA64-NEXT:    andi $a0, $a0, 24
 ; LA64-NEXT:    nor $a4, $a4, $zero
 ; LA64-NEXT:    andi $a5, $a1, 255
+; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB4_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB4_3 Depth 2
@@ -253,6 +258,7 @@
 ; LA64-NEXT:    andi $a0, $a0, 24
 ; LA64-NEXT:    nor $a4, $a4, $zero
 ; LA64-NEXT:    bstrpick.d $a5, $a1, 15, 0
+; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB5_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB5_3 Depth 2
@@ -303,6 +309,7 @@
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    ld.w $a4, $a0, 0
 ; LA64-NEXT:    bstrpick.d $a3, $a1, 31, 0
+; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB6_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB6_3 Depth 2
@@ -347,6 +354,7 @@
 ; LA64-LABEL: atomicrmw_udec_wrap_i64:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    ld.d $a2, $a0, 0
+; LA64-NEXT:    .p2align 4, , 16
 ; LA64-NEXT:  .LBB7_1: # %atomicrmw.start
 ; LA64-NEXT:    # =>This Loop Header: Depth=1
 ; LA64-NEXT:    # Child Loop BB7_3 Depth 2
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
@@ -11,6 +11,7 @@
 ; LA64F-NEXT:    addi.w $a1, $zero, 1
 ; LA64F-NEXT:    movgr2fr.w $fa1, $a1
 ; LA64F-NEXT:    ffint.s.w $fa1, $fa1
+; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB0_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Loop Header: Depth=1
 ; LA64F-NEXT:    # Child Loop BB0_3 Depth 2
@@ -46,6 +47,7 @@
 ; LA64D-NEXT:    addi.w $a1, $zero, 1
 ; LA64D-NEXT:    movgr2fr.w $fa1, $a1
 ; LA64D-NEXT:    ffint.s.w $fa1, $fa1
+; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB0_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Loop Header: Depth=1
 ; LA64D-NEXT:    # Child Loop BB0_3 Depth 2
@@ -85,6 +87,7 @@
 ; LA64F-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI1_0)
 ; LA64F-NEXT:    addi.d $a1, $a1, %pc_lo12(.LCPI1_0)
 ; LA64F-NEXT:    fld.s $fa1, $a1, 0
+; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB1_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Loop Header: Depth=1
 ; LA64F-NEXT:    # Child Loop BB1_3 Depth 2
@@ -120,6 +123,7 @@
 ; LA64D-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI1_0)
 ; LA64D-NEXT:    addi.d $a1, $a1, %pc_lo12(.LCPI1_0)
 ; LA64D-NEXT:    fld.s $fa1, $a1, 0
+; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB1_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Loop Header: Depth=1
 ; LA64D-NEXT:    # Child Loop BB1_3 Depth 2
@@ -159,6 +163,7 @@
 ; LA64F-NEXT:    addi.w $a1, $zero, 1
 ; LA64F-NEXT:    movgr2fr.w $fa1, $a1
 ; LA64F-NEXT:    ffint.s.w $fa1, $fa1
+; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB2_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Loop Header: Depth=1
 ; LA64F-NEXT:    # Child Loop BB2_3 Depth 2
@@ -195,6 +200,7 @@
 ; LA64D-NEXT:    addi.w $a1, $zero, 1
 ; LA64D-NEXT:    movgr2fr.w $fa1, $a1
 ; LA64D-NEXT:    ffint.s.w $fa1, $fa1
+; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB2_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Loop Header: Depth=1
 ; LA64D-NEXT:    # Child Loop BB2_3 Depth 2
@@ -235,6 +241,7 @@
 ; LA64F-NEXT:    addi.w $a1, $zero, 1
 ; LA64F-NEXT:    movgr2fr.w $fa1, $a1
 ; LA64F-NEXT:    ffint.s.w $fa1, $fa1
+; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB3_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Loop Header: Depth=1
 ; LA64F-NEXT:    # Child Loop BB3_3 Depth 2
@@ -271,6 +278,7 @@
 ; LA64D-NEXT:    addi.w $a1, $zero, 1
 ; LA64D-NEXT:    movgr2fr.w $fa1, $a1
 ; LA64D-NEXT:    ffint.s.w $fa1, $fa1
+; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB3_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Loop Header: Depth=1
 ; LA64D-NEXT:    # Child Loop BB3_3 Depth 2
@@ -322,6 +330,7 @@
 ; LA64F-NEXT:    addi.d $s2, $sp, 16
 ; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    ori $s4, $zero, 2
+; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB4_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64F-NEXT:    st.d $a0, $sp, 16
@@ -368,6 +377,7 @@
 ; LA64D-NEXT:    addi.d $s1, $sp, 16
 ; LA64D-NEXT:    addi.d $s2, $sp, 8
 ; LA64D-NEXT:    ori $s3, $zero, 2
+; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB4_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
@@ -414,6 +424,7 @@
 ; LA64F-NEXT:    addi.d $s2, $sp, 16
 ; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    ori $s4, $zero, 2
+; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB5_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64F-NEXT:    st.d $a0, $sp, 16
@@ -460,6 +471,7 @@
 ; LA64D-NEXT:    addi.d $s1, $sp, 16
 ; LA64D-NEXT:    addi.d $s2, $sp, 8
 ; LA64D-NEXT:    ori $s3, $zero, 2
+; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB5_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
@@ -506,6 +518,7 @@
 ; LA64F-NEXT:    addi.d $s2, $sp, 16
 ; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    ori $s4, $zero, 2
+; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB6_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64F-NEXT:    st.d $a0, $sp, 16
@@ -552,6 +565,7 @@
 ; LA64D-NEXT:    addi.d $s1, $sp, 16
 ; LA64D-NEXT:    addi.d $s2, $sp, 8
 ; LA64D-NEXT:    ori $s3, $zero, 2
+; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB6_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
@@ -599,6 +613,7 @@
 ; LA64F-NEXT:    addi.d $s2, $sp, 16
 ; LA64F-NEXT:    addi.d $s3, $sp, 8
 ; LA64F-NEXT:    ori $s4, $zero, 2
+; LA64F-NEXT:    .p2align 4, , 16
 ; LA64F-NEXT:  .LBB7_1: # %atomicrmw.start
 ; LA64F-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64F-NEXT:    st.d $a0, $sp, 16
@@ -645,6 +660,7 @@
 ; LA64D-NEXT:    addi.d $s1, $sp, 16
 ; LA64D-NEXT:    addi.d $s2, $sp, 8
 ; LA64D-NEXT:    ori $s3, $zero, 2
+; LA64D-NEXT:    .p2align 4, , 16
 ; LA64D-NEXT:  .LBB7_1: # %atomicrmw.start
 ; LA64D-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64D-NEXT:    fst.d $fa0, $sp, 16
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll
@@ -5,6 +5,7 @@
 define void @foo() noreturn nounwind {
 ; ALL-LABEL: foo:
 ; ALL:       # %bb.0: # %entry
+; ALL-NEXT:    .p2align 4, , 16
 ; ALL-NEXT:  .LBB0_1: # %loop
 ; ALL-NEXT:    # =>This Inner Loop Header: Depth=1
 ; ALL-NEXT:    b .LBB0_1
diff --git a/llvm/test/CodeGen/LoongArch/preferred-alignments.ll b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/preferred-alignments.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck --check-prefix=LA464 %s
+; RUN: llc --mtriple=loongarch64 --mcpu=la464 < %s | FileCheck --check-prefix=LA464 %s
+
+define signext i32 @sum(ptr noalias nocapture noundef readonly %0, i32 noundef signext %1) {
+; LA464-LABEL: sum:
+; LA464:       # %bb.0:
+; LA464-NEXT:    ori $a2, $zero, 1
+; LA464-NEXT:    blt $a1, $a2, .LBB0_4
+; LA464-NEXT:  # %bb.1:
+; LA464-NEXT:    bstrpick.d $a2, $a1, 31, 0
+; LA464-NEXT:    move $a1, $zero
+; LA464-NEXT:    .p2align 4, , 16
+; LA464-NEXT:  .LBB0_2: # =>This Inner Loop Header: Depth=1
+; LA464-NEXT:    ld.w $a3, $a0, 0
+; LA464-NEXT:    add.d $a1, $a3, $a1
+; LA464-NEXT:    addi.d $a0, $a0, 4
+; LA464-NEXT:    addi.d $a2, $a2, -1
+; LA464-NEXT:    bnez $a2, .LBB0_2
+; LA464-NEXT:  # %bb.3:
+; LA464-NEXT:    addi.w $a0, $a1, 0
+; LA464-NEXT:    ret
+; LA464-NEXT:  .LBB0_4:
+; LA464-NEXT:    move $a1, $zero
+; LA464-NEXT:    addi.w $a0, $a1, 0
+; LA464-NEXT:    ret
+  %3 = icmp sgt i32 %1, 0
+  br i1 %3, label %4, label %6
+
+4:                                                ; preds = %2
+  %5 = zext i32 %1 to i64
+  br label %8
+
+6:                                                ; preds = %8, %2
+  %7 = phi i32 [ 0, %2 ], [ %13, %8 ]
+  ret i32 %7
+
+8:                                                ; preds = %4, %8
+  %9 = phi i64 [ 0, %4 ], [ %14, %8 ]
+  %10 = phi i32 [ 0, %4 ], [ %13, %8 ]
+  %11 = getelementptr inbounds i32, ptr %0, i64 %9
+  %12 = load i32, ptr %11, align 4
+  %13 = add nsw i32 %12, %10
+  %14 = add nuw nsw i64 %9, 1
+  %15 = icmp eq i64 %14, %5
+  br i1 %15, label %6, label %8
+}