diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -166,6 +166,9 @@ "Enable Hardware Transactional Memory instructions">; def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true", "Implement mftb using the mfspr instruction">; +def FeatureUnalignedFloats : + SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", + "true", "CPU does not trap on unaligned FP access">; def FeaturePPCPreRASched: SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true", "Use PowerPC pre-RA scheduling strategy">; @@ -252,7 +255,8 @@ FeatureExtDiv, FeatureMFTB, DeprecatedDST, - FeatureTwoConstNR]; + FeatureTwoConstNR, + FeatureUnalignedFloats]; list P7SpecificFeatures = []; list P7Features = !listconcat(P7InheritableFeatures, P7SpecificFeatures); @@ -454,7 +458,8 @@ FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */, FeatureMFTB]>; + Feature64Bit /*, Feature64BitRegs */, FeatureMFTB, + FeatureUnalignedFloats]>; def : ProcessorModel<"a2q", PPCA2Model, [DirectiveA2, FeatureICBT, FeatureBookE, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, @@ -463,7 +468,7 @@ FeatureFPRND, FeatureFPCVT, FeatureISEL, FeatureSlowPOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeatureQPX, - FeatureMFTB]>; + FeatureMFTB, FeatureUnalignedFloats]>; def : ProcessorModel<"pwr3", G5Model, [DirectivePwr3, FeatureAltivec, FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15131,6 +15131,9 @@ if (!VT.isSimple()) return false; + if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess()) + return false; + if (VT.getSimpleVT().isVector()) { if (Subtarget.hasVSX()) { if (VT != MVT::v2f64 && VT != MVT::v2i64 && diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -124,6 +124,7 @@ bool IsPPC4xx; bool IsPPC6xx; bool FeatureMFTB; + bool AllowsUnalignedFPAccess; bool DeprecatedDST; bool HasLazyResolverStubs; bool IsLittleEndian; @@ -274,6 +275,7 @@ bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; } bool isE500() const { return IsE500; } bool isFeatureMFTB() const { return FeatureMFTB; } + bool allowsUnalignedFPAccess() const { return AllowsUnalignedFPAccess; } bool isDeprecatedDST() const { return DeprecatedDST; } bool hasICBT() const { return HasICBT; } bool hasInvariantFunctionDescriptors() const { diff --git a/llvm/test/CodeGen/PowerPC/unaligned-floats.ll b/llvm/test/CodeGen/PowerPC/unaligned-floats.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/unaligned-floats.ll @@ -0,0 +1,43 @@ +; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -mtriple=powerpc-- \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mattr=allow-unaligned-fp-access -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc-- -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -ppc-asm-full-reg-names -mtriple=powerpc-- \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=UNALIGN + +; Test case as provided by author in https://bugs.llvm.org/show_bug.cgi?id=40554 +%struct.anon = type { i32, [5 x i8] } + +@s = dso_local local_unnamed_addr global %struct.anon { i32 0, [5 x i8] c"\00B\F6\E9y" }, align 4 +@.str = private unnamed_addr constant [4 x i8] c"%g\0A\00", align 1 +; Function Attrs: nofree nounwind +define dso_local i32 @main() local_unnamed_addr { +; CHECK-LABEL: main: +; CHECK: lfs f1, 5(r3) +; CHECK: blr +; +; UNALIGN-LABEL: main: +; UNALIGN: lfs f1, 12(r1) +; UNALIGN: blr +entry: + %0 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 1), align 1 + %conv = zext i8 %0 to i32 + %shl = shl nuw i32 %conv, 24 + %1 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 2), align 2 + %conv1 = zext i8 %1 to i32 + %shl2 = shl nuw nsw i32 %conv1, 16 + %add = or i32 %shl2, %shl + %2 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 3), align 1 + %conv3 = zext i8 %2 to i32 + %shl4 = shl nuw nsw i32 %conv3, 8 + %add5 = or i32 %add, %shl4 + %3 = load i8, i8* getelementptr inbounds (%struct.anon, %struct.anon* @s, i32 0, i32 1, i32 4), align 4 + %conv6 = zext i8 %3 to i32 + %add7 = or i32 %add5, %conv6 + %4 = bitcast i32 %add7 to float + %conv8 = fpext float %4 to double + %call = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double %conv8) + ret i32 0 +} +; Function Attrs: nofree nounwind +declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr