diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3697,12 +3697,14 @@ "to use the value implied by -march/-mcpu. Value will be reflected " "in __riscv_v_fixed_vlen preprocessor define (RISC-V only)">; -def munaligned_access : Flag<["-"], "munaligned-access">, Group, - HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">; -def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, - HelpText<"Force all memory accesses to be aligned (AArch32/AArch64 only)">; +def munaligned_access : Flag<["-"], "munaligned-access">, Group, + HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64/LoongArch only)">; +def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group, + HelpText<"Force all memory accesses to be aligned (AArch32/AArch64/LoongArch only)">; def mstrict_align : Flag<["-"], "mstrict-align">, Alias, Flags<[CC1Option,HelpHidden]>, HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">; +def mno_strict_align : Flag<["-"], "mno-strict-align">, Alias, Flags<[CC1Option,HelpHidden]>, + HelpText<"Allow memory accesses to be unaligned (same as munaligned-access)">; def mno_thumb : Flag<["-"], "mno-thumb">, Group; def mrestrict_it: Flag<["-"], "mrestrict-it">, Group, HelpText<"Disallow generation of complex IT blocks.">; diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "LoongArch.h" +#include "ToolChains/CommonArgs.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" @@ -133,4 +134,9 @@ D.Diag(diag::err_drv_loongarch_invalid_mfpu_EQ) << FPU; } } + + // Select the `ual` feature determined by -m[no-]unaligned-access + // or the alias -m[no-]strict-align. + AddTargetFeature(Args, Features, options::OPT_munaligned_access, + options::OPT_mno_unaligned_access, "ual"); } diff --git a/clang/test/Driver/loongarch-default-features.c b/clang/test/Driver/loongarch-default-features.c --- a/clang/test/Driver/loongarch-default-features.c +++ b/clang/test/Driver/loongarch-default-features.c @@ -2,7 +2,7 @@ // RUN: %clang --target=loongarch64 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=LA64 // LA32: "target-features"="+32bit" -// LA64: "target-features"="+64bit,+d,+f" +// LA64: "target-features"="+64bit,+d,+f,+ual" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c --- a/clang/test/Driver/loongarch-march.c +++ b/clang/test/Driver/loongarch-march.c @@ -8,17 +8,17 @@ // RUN: FileCheck %s --check-prefix=IR-LA464 // CC1-LOONGARCH64-NOT: "-target-feature" -// CC1-LOONGARCH64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" +// CC1-LOONGARCH64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+ual" // CC1-LOONGARCH64-NOT: "-target-feature" // CC1-LOONGARCH64: "-target-abi" "lp64d" // CC1-LA464-NOT: "-target-feature" -// CC1-LA464: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" +// CC1-LA464: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" // CC1-LA464-NOT: "-target-feature" // CC1-LA464: "-target-abi" "lp64d" -// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f" -// IR-LA464: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx" +// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f,+ual" +// IR-LA464: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-mdouble-float.c b/clang/test/Driver/loongarch-mdouble-float.c --- a/clang/test/Driver/loongarch-mdouble-float.c +++ b/clang/test/Driver/loongarch-mdouble-float.c @@ -8,12 +8,10 @@ // WARN: warning: argument unused during compilation: '-mfpu=0' // WARN: warning: argument unused during compilation: '-mabi=lp64s' -// CC1-NOT: "-target-feature" -// CC1: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" -// CC1-NOT: "-target-feature" +// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "+d" // CC1: "-target-abi" "lp64d" -// IR: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f" +// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+d,{{(.*,)?}}+f{{(,.*)?}}" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-mfpu.c b/clang/test/Driver/loongarch-mfpu.c --- a/clang/test/Driver/loongarch-mfpu.c +++ b/clang/test/Driver/loongarch-mfpu.c @@ -16,24 +16,18 @@ // RUN: %clang --target=loongarch64 -mfpu=none -S -emit-llvm %s -o - | \ // RUN: FileCheck %s --check-prefix=IR-FPU0 -// CC1-FPU64-NOT: "-target-feature" -// CC1-FPU64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" -// CC1-FPU64-NOT: "-target-feature" +// CC1-FPU64: "-target-feature" "+f"{{.*}} "-target-feature" "+d" // CC1-FPU64: "-target-abi" "lp64d" -// CC1-FPU32-NOT: "-target-feature" -// CC1-FPU32: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "-d" -// CC1-FPU32-NOT: "-target-feature" +// CC1-FPU32: "-target-feature" "+f"{{.*}} "-target-feature" "-d" // CC1-FPU32: "-target-abi" "lp64f" -// CC1-FPU0-NOT: "-target-feature" -// CC1-FPU0: "-target-feature" "+64bit" "-target-feature" "-f" "-target-feature" "-d" -// CC1-FPU0-NOT: "-target-feature" +// CC1-FPU0: "-target-feature" "-f"{{.*}} "-target-feature" "-d" // CC1-FPU0: "-target-abi" "lp64s" -// IR-FPU64: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f" -// IR-FPU32: attributes #[[#]] ={{.*}}"target-features"="+64bit,+f,-d" -// IR-FPU0: attributes #[[#]] ={{.*}}"target-features"="+64bit,-d,-f" +// IR-FPU64: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+d,{{(.*,)?}}+f{{(,.*)?}}" +// IR-FPU32: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d{{(,.*)?}}" +// IR-FPU0: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-msingle-float.c b/clang/test/Driver/loongarch-msingle-float.c --- a/clang/test/Driver/loongarch-msingle-float.c +++ b/clang/test/Driver/loongarch-msingle-float.c @@ -8,12 +8,10 @@ // WARN: warning: argument unused during compilation: '-mfpu=0' // WARN: warning: argument unused during compilation: '-mabi=lp64s' -// CC1-NOT: "-target-feature" -// CC1: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "-d" -// CC1-NOT: "-target-feature" +// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d" // CC1: "-target-abi" "lp64f" -// IR: attributes #[[#]] ={{.*}}"target-features"="+64bit,+f,-d" +// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-msoft-float.c b/clang/test/Driver/loongarch-msoft-float.c --- a/clang/test/Driver/loongarch-msoft-float.c +++ b/clang/test/Driver/loongarch-msoft-float.c @@ -8,12 +8,10 @@ // WARN: warning: argument unused during compilation: '-mfpu=64' // WARN: warning: argument unused during compilation: '-mabi=lp64d' -// CC1-NOT: "-target-feature" -// CC1: "-target-feature" "+64bit" "-target-feature" "-f" "-target-feature" "-d" -// CC1-NOT: "-target-feature" +// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d" // CC1: "-target-abi" "lp64s" -// IR: attributes #[[#]] ={{.*}}"target-features"="+64bit,-d,-f" +// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-munaligned-access.c b/clang/test/Driver/loongarch-munaligned-access.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/loongarch-munaligned-access.c @@ -0,0 +1,61 @@ +/// Test -m[no-]unaligned-access and -m[no-]strict-align options. + +// RUN: %clang --target=loongarch64 -munaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED + +// RUN: %clang --target=loongarch64 -munaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED +// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-UNALIGNED +// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED + +// CC1-UNALIGNED: "-target-feature" "+ual" +// CC1-NO-UNALIGNED: "-target-feature" "-ual" + +// IR-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+ual{{(,.*)?}}" +// IR-NO-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-ual{{(,.*)?}}" + +int foo(void) { + return 3; +} diff --git a/clang/test/Driver/munaligned-access-unused.c b/clang/test/Driver/munaligned-access-unused.c new file mode 100644 --- /dev/null +++ b/clang/test/Driver/munaligned-access-unused.c @@ -0,0 +1,8 @@ +/// Check -m[no-]unaligned-access and -m[no-]strict-align are warned unused on a target that does not support them. + +// RUN: %clang --target=x86_64 -munaligned-access -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=unaligned-access +// RUN: %clang --target=x86_64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=no-unaligned-access +// RUN: %clang --target=x86_64 -mstrict-align -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=strict-align +// RUN: %clang --target=x86_64 -mno-strict-align -fsyntax-only %s -### 2>&1 | FileCheck %s -DOPTION=no-strict-align + +// CHECK: clang: warning: argument unused during compilation: '-m[[OPTION]]' [-Wunused-command-line-argument] diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h @@ -46,6 +46,9 @@ // Loongson Virtualization Extension is available. FK_LVZ = 1 << 7, + + // Allow memory accesses to be unaligned. + FK_UAL = 1 << 8, }; struct FeatureInfo { diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def @@ -11,6 +11,7 @@ LOONGARCH_FEATURE("+lasx", FK_LASX) LOONGARCH_FEATURE("+lbt", FK_LBT) LOONGARCH_FEATURE("+lvz", FK_LVZ) +LOONGARCH_FEATURE("+ual", FK_UAL) #undef LOONGARCH_FEATURE @@ -19,7 +20,7 @@ #endif LOONGARCH_ARCH("invalid", AK_INVALID, FK_INVALID) -LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64) -LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX) +LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) +LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) #undef LOONGARCH_ARCH diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -115,6 +115,11 @@ AssemblerPredicate<(all_of LaLocalWithAbs), "Expand la.local as la.abs">; +// Unaligned memory access +def FeatureUAL + : SubtargetFeature<"ual", "HasUAL", "true", + "Allow memory accesses to be unaligned">; + //===----------------------------------------------------------------------===// // Registers, instruction descriptions ... //===----------------------------------------------------------------------===// @@ -128,13 +133,14 @@ //===----------------------------------------------------------------------===// def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>; -def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>; +def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>; // Support generic for compatibility with other targets. The triple will be used // to change to the appropriate la32/la64 version. def : ProcessorModel<"generic", NoSchedModel, []>; def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, + FeatureUAL, FeatureExtLASX, FeatureExtLVZ, FeatureExtLBT]>; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -191,6 +191,11 @@ bool convertSelectOfConstantsToMath(EVT VT) const override { return true; } + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + unsigned *Fast = nullptr) const override; + private: /// Target-specific function used to lower LoongArch calling conventions. typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1785,6 +1785,18 @@ } } +bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, + unsigned *Fast) const { + if (!Subtarget.hasUAL()) + return false; + + // TODO: set reasonable speed number. + if (Fast) + *Fast = 1; + return true; +} + const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((LoongArchISD::NodeType)Opcode) { case LoongArchISD::FIRST_NUMBER: diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -42,6 +42,7 @@ bool HasLaGlobalWithPcrel = false; bool HasLaGlobalWithAbs = false; bool HasLaLocalWithAbs = false; + bool HasUAL = false; unsigned GRLen = 32; MVT GRLenVT = MVT::i32; LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; @@ -91,6 +92,7 @@ bool hasLaGlobalWithPcrel() const { return HasLaGlobalWithPcrel; } bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; } bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } + bool hasUAL() const { return HasUAL; } MVT getGRLenVT() const { return GRLenVT; } unsigned getGRLen() const { return GRLen; } LoongArchABI::ABI getTargetABI() const { return TargetABI; } diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll --- a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll +++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll @@ -360,17 +360,13 @@ ; CHECK-LABEL: callee_large_struct_ret: ; CHECK: # %bb.0: ; CHECK-NEXT: ori $a1, $zero, 4 -; CHECK-NEXT: st.w $a1, $a0, 24 +; CHECK-NEXT: st.d $a1, $a0, 24 ; CHECK-NEXT: ori $a1, $zero, 3 -; CHECK-NEXT: st.w $a1, $a0, 16 +; CHECK-NEXT: st.d $a1, $a0, 16 ; CHECK-NEXT: ori $a1, $zero, 2 -; CHECK-NEXT: st.w $a1, $a0, 8 -; CHECK-NEXT: st.w $zero, $a0, 28 -; CHECK-NEXT: st.w $zero, $a0, 20 -; CHECK-NEXT: st.w $zero, $a0, 12 -; CHECK-NEXT: st.w $zero, $a0, 4 +; CHECK-NEXT: st.d $a1, $a0, 8 ; CHECK-NEXT: ori $a1, $zero, 1 -; CHECK-NEXT: st.w $a1, $a0, 0 +; CHECK-NEXT: st.d $a1, $a0, 0 ; CHECK-NEXT: ret %a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0 store i64 1, ptr %a, align 4 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll @@ -315,10 +315,7 @@ ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ; LA64F-NEXT: move $fp, $a0 -; LA64F-NEXT: ld.wu $a0, $a0, 0 -; LA64F-NEXT: ld.wu $a1, $fp, 4 -; LA64F-NEXT: slli.d $a1, $a1, 32 -; LA64F-NEXT: or $a0, $a1, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 ; LA64F-NEXT: ori $s0, $zero, 8 ; LA64F-NEXT: addi.d $s1, $sp, 8 ; LA64F-NEXT: addi.d $s2, $sp, 0 @@ -360,11 +357,7 @@ ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 -; LA64D-NEXT: ld.wu $a0, $a0, 0 -; LA64D-NEXT: ld.wu $a1, $fp, 4 -; LA64D-NEXT: slli.d $a1, $a1, 32 -; LA64D-NEXT: or $a0, $a1, $a0 -; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 ; LA64D-NEXT: addi.d $a0, $zero, 1 ; LA64D-NEXT: movgr2fr.d $fs0, $a0 ; LA64D-NEXT: ori $s0, $zero, 8 @@ -411,10 +404,7 @@ ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ; LA64F-NEXT: move $fp, $a0 -; LA64F-NEXT: ld.wu $a0, $a0, 0 -; LA64F-NEXT: ld.wu $a1, $fp, 4 -; LA64F-NEXT: slli.d $a1, $a1, 32 -; LA64F-NEXT: or $a0, $a1, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 ; LA64F-NEXT: ori $s0, $zero, 8 ; LA64F-NEXT: addi.d $s1, $sp, 8 ; LA64F-NEXT: addi.d $s2, $sp, 0 @@ -456,11 +446,7 @@ ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 -; LA64D-NEXT: ld.wu $a0, $a0, 0 -; LA64D-NEXT: ld.wu $a1, $fp, 4 -; LA64D-NEXT: slli.d $a1, $a1, 32 -; LA64D-NEXT: or $a0, $a1, $a0 -; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 ; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) ; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0) ; LA64D-NEXT: fld.d $fs0, $a0, 0 @@ -507,10 +493,7 @@ ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ; LA64F-NEXT: move $fp, $a0 -; LA64F-NEXT: ld.wu $a0, $a0, 0 -; LA64F-NEXT: ld.wu $a1, $fp, 4 -; LA64F-NEXT: slli.d $a1, $a1, 32 -; LA64F-NEXT: or $a0, $a1, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 ; LA64F-NEXT: ori $s0, $zero, 8 ; LA64F-NEXT: addi.d $s1, $sp, 8 ; LA64F-NEXT: addi.d $s2, $sp, 0 @@ -552,11 +535,7 @@ ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 -; LA64D-NEXT: ld.wu $a0, $a0, 0 -; LA64D-NEXT: ld.wu $a1, $fp, 4 -; LA64D-NEXT: slli.d $a1, $a1, 32 -; LA64D-NEXT: or $a0, $a1, $a0 -; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 ; LA64D-NEXT: addi.d $a0, $zero, 1 ; LA64D-NEXT: movgr2fr.d $fs0, $a0 ; LA64D-NEXT: ori $s0, $zero, 8 @@ -604,10 +583,7 @@ ; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill ; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill ; LA64F-NEXT: move $fp, $a0 -; LA64F-NEXT: ld.wu $a0, $a0, 0 -; LA64F-NEXT: ld.wu $a1, $fp, 4 -; LA64F-NEXT: slli.d $a1, $a1, 32 -; LA64F-NEXT: or $a0, $a1, $a0 +; LA64F-NEXT: ld.d $a0, $a0, 0 ; LA64F-NEXT: ori $s0, $zero, 8 ; LA64F-NEXT: addi.d $s1, $sp, 8 ; LA64F-NEXT: addi.d $s2, $sp, 0 @@ -649,11 +625,7 @@ ; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill ; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill ; LA64D-NEXT: move $fp, $a0 -; LA64D-NEXT: ld.wu $a0, $a0, 0 -; LA64D-NEXT: ld.wu $a1, $fp, 4 -; LA64D-NEXT: slli.d $a1, $a1, 32 -; LA64D-NEXT: or $a0, $a1, $a0 -; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: fld.d $fa0, $a0, 0 ; LA64D-NEXT: addi.d $a0, $zero, 1 ; LA64D-NEXT: movgr2fr.d $fs0, $a0 ; LA64D-NEXT: ori $s0, $zero, 8 diff --git a/llvm/test/CodeGen/LoongArch/tail-calls.ll b/llvm/test/CodeGen/LoongArch/tail-calls.ll --- a/llvm/test/CodeGen/LoongArch/tail-calls.ll +++ b/llvm/test/CodeGen/LoongArch/tail-calls.ll @@ -13,6 +13,7 @@ } ;; Perform tail call optimization for external symbol. +;; Bytes copied should be large enough, otherwise the memcpy call would be optimized to multiple ld/st insns. @dest = global [2 x i8] zeroinitializer declare void @llvm.memcpy.p0i8.p0i8.i32(ptr, ptr, i32, i1) define void @caller_extern(ptr %src) optsize { @@ -21,10 +22,10 @@ ; CHECK-NEXT: move $a1, $a0 ; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(dest) ; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(dest) -; CHECK-NEXT: ori $a2, $zero, 7 +; CHECK-NEXT: ori $a2, $zero, 33 ; CHECK-NEXT: b %plt(memcpy) entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 7, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 33, i1 false) ret void } diff --git a/llvm/test/CodeGen/LoongArch/unaligned-access.ll b/llvm/test/CodeGen/LoongArch/unaligned-access.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/unaligned-access.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 + +;; Test the ual feature which is similar to AArch64/arm64-strict-align.ll. + +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32-ALIGNED +; RUN: llc --mtriple=loongarch32 --mattr=+ual < %s | FileCheck %s --check-prefix=LA32-UNALIGNED +; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32-ALIGNED + +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64-UNALIGNED +; RUN: llc --mtriple=loongarch64 --mattr=+ual < %s | FileCheck %s --check-prefix=LA64-UNALIGNED +; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64-ALIGNED + +define i32 @f0(ptr %p) nounwind { +; LA32-ALIGNED-LABEL: f0: +; LA32-ALIGNED: # %bb.0: +; LA32-ALIGNED-NEXT: ld.hu $a1, $a0, 0 +; LA32-ALIGNED-NEXT: ld.hu $a0, $a0, 2 +; LA32-ALIGNED-NEXT: slli.w $a0, $a0, 16 +; LA32-ALIGNED-NEXT: or $a0, $a0, $a1 +; LA32-ALIGNED-NEXT: ret +; +; LA32-UNALIGNED-LABEL: f0: +; LA32-UNALIGNED: # %bb.0: +; LA32-UNALIGNED-NEXT: ld.w $a0, $a0, 0 +; LA32-UNALIGNED-NEXT: ret +; +; LA64-UNALIGNED-LABEL: f0: +; LA64-UNALIGNED: # %bb.0: +; LA64-UNALIGNED-NEXT: ld.w $a0, $a0, 0 +; LA64-UNALIGNED-NEXT: ret +; +; LA64-ALIGNED-LABEL: f0: +; LA64-ALIGNED: # %bb.0: +; LA64-ALIGNED-NEXT: ld.hu $a1, $a0, 0 +; LA64-ALIGNED-NEXT: ld.h $a0, $a0, 2 +; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 16 +; LA64-ALIGNED-NEXT: or $a0, $a0, $a1 +; LA64-ALIGNED-NEXT: ret + %tmp = load i32, ptr %p, align 2 + ret i32 %tmp +} + +define i64 @f1(ptr %p) nounwind { +; LA32-ALIGNED-LABEL: f1: +; LA32-ALIGNED: # %bb.0: +; LA32-ALIGNED-NEXT: ld.w $a2, $a0, 0 +; LA32-ALIGNED-NEXT: ld.w $a1, $a0, 4 +; LA32-ALIGNED-NEXT: move $a0, $a2 +; LA32-ALIGNED-NEXT: ret +; +; LA32-UNALIGNED-LABEL: f1: +; LA32-UNALIGNED: # %bb.0: +; LA32-UNALIGNED-NEXT: ld.w $a2, $a0, 0 +; LA32-UNALIGNED-NEXT: ld.w $a1, $a0, 4 +; LA32-UNALIGNED-NEXT: move $a0, $a2 +; LA32-UNALIGNED-NEXT: ret +; +; LA64-UNALIGNED-LABEL: f1: +; LA64-UNALIGNED: # %bb.0: +; LA64-UNALIGNED-NEXT: ld.d $a0, $a0, 0 +; LA64-UNALIGNED-NEXT: ret +; +; LA64-ALIGNED-LABEL: f1: +; LA64-ALIGNED: # %bb.0: +; LA64-ALIGNED-NEXT: ld.wu $a1, $a0, 0 +; LA64-ALIGNED-NEXT: ld.wu $a0, $a0, 4 +; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 32 +; LA64-ALIGNED-NEXT: or $a0, $a0, $a1 +; LA64-ALIGNED-NEXT: ret + %tmp = load i64, ptr %p, align 4 + ret i64 %tmp +} diff --git a/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 + +;; Test how memcpy is optimized when ual is turned off which is similar to AArch64/arm64-misaligned-memcpy-inline.ll. + +; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64 + +;; Small (16 bytes here) unaligned memcpy() should be a function call if +;; ual is turned off. +define void @t0(ptr %out, ptr %in) { +; LA32-LABEL: t0: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: ori $a2, $zero, 16 +; LA32-NEXT: bl %plt(memcpy) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: t0: +; LA64: # %bb.0: # %entry +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: ori $a2, $zero, 16 +; LA64-NEXT: bl %plt(memcpy) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret +entry: + call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false) + ret void +} + +;; Small (16 bytes here) aligned memcpy() should be inlined even if +;; ual is turned off. +define void @t1(ptr align 8 %out, ptr align 8 %in) { +; LA32-LABEL: t1: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.w $a2, $a1, 12 +; LA32-NEXT: st.w $a2, $a0, 12 +; LA32-NEXT: ld.w $a2, $a1, 8 +; LA32-NEXT: st.w $a2, $a0, 8 +; LA32-NEXT: ld.w $a2, $a1, 4 +; LA32-NEXT: st.w $a2, $a0, 4 +; LA32-NEXT: ld.w $a1, $a1, 0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: t1: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.d $a2, $a1, 8 +; LA64-NEXT: st.d $a2, $a0, 8 +; LA64-NEXT: ld.d $a1, $a1, 0 +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret +entry: + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false) + ret void +} + +;; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized +;; loads and stores if ual is turned off. +define void @t2(ptr %out, ptr %in) { +; LA32-LABEL: t2: +; LA32: # %bb.0: # %entry +; LA32-NEXT: ld.b $a2, $a1, 3 +; LA32-NEXT: st.b $a2, $a0, 3 +; LA32-NEXT: ld.b $a2, $a1, 2 +; LA32-NEXT: st.b $a2, $a0, 2 +; LA32-NEXT: ld.b $a2, $a1, 1 +; LA32-NEXT: st.b $a2, $a0, 1 +; LA32-NEXT: ld.b $a1, $a1, 0 +; LA32-NEXT: st.b $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: t2: +; LA64: # %bb.0: # %entry +; LA64-NEXT: ld.b $a2, $a1, 3 +; LA64-NEXT: st.b $a2, $a0, 3 +; LA64-NEXT: ld.b $a2, $a1, 2 +; LA64-NEXT: st.b $a2, $a0, 2 +; LA64-NEXT: ld.b $a2, $a1, 1 +; LA64-NEXT: st.b $a2, $a0, 1 +; LA64-NEXT: ld.b $a1, $a1, 0 +; LA64-NEXT: st.b $a1, $a0, 0 +; LA64-NEXT: ret +entry: + call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false) + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)