Index: llvm/trunk/lib/Target/ARM/ARM.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARM.td +++ llvm/trunk/lib/Target/ARM/ARM.td @@ -99,6 +99,8 @@ // Not to be confused with FeatureHasRetAddrStack (return address stack) def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", "Enable Reliability, Availability and Serviceability extensions">; +def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", + "Enable fast computation of positive address offsets">; // Cyclone has preferred instructions for zeroing VFP registers, which can @@ -773,13 +775,15 @@ FeatureHWDiv, FeatureHWDivARM, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureFPAO]>; def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57, FeatureHWDiv, FeatureHWDivARM, FeatureCrypto, - FeatureCRC]>; + FeatureCRC, + FeatureFPAO]>; def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, FeatureHWDiv, Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -291,6 +291,14 @@ /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + + /// getScalingFactorCost - Return the cost of the scaling used in + /// addressing mode represented by AM. + /// If the AM is supported, the return value must be >= 0. + /// If the AM is not supported, the return value must be negative. + int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS) const override; + bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -11612,6 +11612,17 @@ return true; } +int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS) const { + if (isLegalAddressingMode(DL, AM, Ty, AS)) { + if (Subtarget->hasFPAO()) + return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster + return 0; + } + return -1; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h @@ -235,6 +235,9 @@ /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing = false; + /// HasFPAO - if true, processor does positive address offset computation faster + bool HasFPAO = false; + /// If true, if conversion may decide to leave some instructions unpredicated. bool IsProfitableToUnpredicate = false; @@ -453,6 +456,7 @@ bool hasTrustZone() const { return HasTrustZone; } bool has8MSecExt() const { return Has8MSecExt; } bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } + bool hasFPAO() const { return HasFPAO; } bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; } bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; } bool hasSlowVDUP32() const { return HasSlowVDUP32; } Index: llvm/trunk/test/CodeGen/ARM/lsr-scale-addr-mode.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/lsr-scale-addr-mode.ll +++ llvm/trunk/test/CodeGen/ARM/lsr-scale-addr-mode.ll @@ -1,6 +1,10 @@ ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s ; Should use scaled addressing mode. +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a53 %s -o - | FileCheck %s -check-prefix CHECK-NONEGOFF-A53 +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a57 %s -o - | FileCheck %s -check-prefix CHECK-NONEGOFF-A57 +; Should not generate negated register offset + define void @sintzero(i32* %a) nounwind { entry: store i32 0, i32* %a @@ -19,4 +23,6 @@ } ; CHECK: lsl{{.*}}#2] +; CHECK-NONEGOFF-A53: [{{r[0-9]+}}, {{r[0-9]+}}, lsl{{.*}}#2] +; CHECK-NONEGOFF-A57: [{{r[0-9]+}}, {{r[0-9]+}}, lsl{{.*}}#2]