Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14191,8 +14191,9 @@ llvm::countr_zero(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) - 3; // Is the constant foldable in the shift of the addressing mode? - // I.e., shift amount is between 1 and 4 inclusive. - if (ShiftAmt == 0 || ShiftAmt > 4) + // I.e., shift amount is between 1 and 3 inclusive. + if (!Subtarget->hasFeature(AArch64::FeatureAddrLSLFast) || + ShiftAmt == 0 || ShiftAmt > 3) return false; break; } Index: llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll +++ llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll @@ -1,6 +1,6 @@ -; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS -; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS -; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE +; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -mattr=+addr-lsl-fast | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS +; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion -mattr=+addr-lsl-fast | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS +; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion -mattr=+addr-lsl-fast | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefix=DISABLE ; CodeGenPrepare should move the zext into the block with the load ; so that SelectionDAG can select it with the load. @@ -454,16 +454,16 @@ ; The input has one free zext and one free sext. If we would have promoted ; all the way through the load we would end up with a free zext and a ; non-free sext (of %b). -; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128 +; OPTALL-LABEL: @promoteFreeSExtFromAddrMode128 ; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, ptr %p ; ; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 ; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]] ; -; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 -; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b -; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64 +; NONSTRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64 +; NONSTRESS-NEXT: [[SEXT64:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64 +; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXT64]] ; ; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32 ; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b @@ -472,7 +472,7 @@ ; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, ptr %addr, i64 [[IDX64]] ; OPTALL-NEXT: store i128 %stuff, ptr [[GEP]] ; OPTALL-NEXT: ret void -define void @doNotPromoteFreeSExtFromAddrMode128(ptr %p, i32 %b, ptr %addr, i128 %stuff) { +define void @promoteFreeSExtFromAddrMode128(ptr %p, i32 %b, ptr %addr, i128 %stuff) { entry: %t = load i8, ptr %p %zextt = zext i8 %t to i32 Index: llvm/test/Transforms/Inline/AArch64/ext.ll =================================================================== --- llvm/test/Transforms/Inline/AArch64/ext.ll +++ llvm/test/Transforms/Inline/AArch64/ext.ll @@ -11,7 +11,7 @@ ; sext can be folded into gep. ; CHECK: Analyzing call of inner1 -; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructionsSimplified: 2 ; CHECK: NumInstructions: 4 define i32 @inner1(ptr %ptr, i32 %i) { %E = sext i32 %i to i64 Index: llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll +++ llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64--linux-gnu < %s | FileCheck %s +; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64--linux-gnu -mattr=+addr-lsl-fast < %s | FileCheck %s target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128"