diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -452,7 +452,7 @@ bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, SDValue &Offset, SDValue &SignExtend, SDValue &DoShift); - bool isWorthFolding(SDValue V) const; + bool isWorthFolding(SDValue V, bool FoldToBaseAddr) const; bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, SDValue &Offset, SDValue &SignExtend); @@ -631,7 +631,7 @@ /// Determine whether it is worth it to fold SHL into the addressing /// mode. -static bool isWorthFoldingSHL(SDValue V) { +static bool isWorthFoldingSHL(SDValue V, bool FoldToBaseAddr = false) { assert(V.getOpcode() == ISD::SHL && "invalid opcode"); // It is worth folding logical shift of up to three places. auto *CSD = dyn_cast(V.getOperand(1)); @@ -644,32 +644,35 @@ // Check if this particular node is reused in any non-memory related // operation. If yes, do not try to fold this node into the address // computation, since the computation will be kept. - const SDNode *Node = V.getNode(); - for (SDNode *UI : Node->uses()) - if (!isa(*UI)) - for (SDNode *UII : UI->uses()) - if (!isa(*UII)) - return false; + if (!FoldToBaseAddr) { + const SDNode *Node = V.getNode(); + for (SDNode *UI : Node->uses()) + if (!isa(*UI)) + for (SDNode *UII : UI->uses()) + if (!isa(*UII)) + return false; + } return true; } /// Determine whether it is worth to fold V into an extended register. -bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { +bool AArch64DAGToDAGISel::isWorthFolding(SDValue V, bool FoldToBaseAddr = false) const { + bool AllowLSLFast = Subtarget->hasLSLFast() ? true : FoldToBaseAddr; // Trivial if we are optimizing for code size or if there is only // one use of the value. if (CurDAG->shouldOptForSize() || V.hasOneUse()) return true; // If a subtarget has a fastpath LSL we can fold a logical shift into // the addressing mode and save a cycle. - if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && - isWorthFoldingSHL(V)) + if (AllowLSLFast && V.getOpcode() == ISD::SHL && + isWorthFoldingSHL(V, FoldToBaseAddr)) return true; - if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { + if (AllowLSLFast && V.getOpcode() == ISD::ADD) { const SDValue LHS = V.getOperand(0); const SDValue RHS = V.getOperand(1); - if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) + if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS, FoldToBaseAddr)) return true; - if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) + if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS, FoldToBaseAddr)) return true; } @@ -1185,7 +1188,7 @@ } // Remember if it is worth folding N when it produces extended register. - bool IsExtendedRegisterWorthFolding = isWorthFolding(N); + bool IsExtendedRegisterWorthFolding = isWorthFolding(N, /* FoldtoBaseAddr */true); // Try to match a shifted extend on the RHS. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && @@ -1298,7 +1301,7 @@ } // Remember if it is worth folding N when it produces extended register. - bool IsExtendedRegisterWorthFolding = isWorthFolding(N); + bool IsExtendedRegisterWorthFolding = isWorthFolding(N, /*FoldToBaseAddr*/ true); // Try to match a shifted extend on the RHS. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll --- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll @@ -8,36 +8,20 @@ declare void @foo() define i16 @halfword(ptr %ctx, i32 %xor72) nounwind { -; CHECK0-LABEL: halfword: -; CHECK0: // %bb.0: -; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-NEXT: ubfx x8, x1, #9, #8 -; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: lsl x21, x8, #1 -; CHECK0-NEXT: mov x19, x0 -; CHECK0-NEXT: ldrh w20, [x0, x21] -; CHECK0-NEXT: bl foo -; CHECK0-NEXT: mov w0, w20 -; CHECK0-NEXT: strh w20, [x19, x21] -; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: halfword: -; CHECK3: // %bb.0: -; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK3-NEXT: ubfx x21, x1, #9, #8 -; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK3-NEXT: mov x19, x0 -; CHECK3-NEXT: ldrh w20, [x0, x21, lsl #1] -; CHECK3-NEXT: bl foo -; CHECK3-NEXT: mov w0, w20 -; CHECK3-NEXT: strh w20, [x19, x21, lsl #1] -; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK3-NEXT: ret +; CHECK-LABEL: halfword: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x21, x1, #9, #8 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: ldrh w20, [x0, x21, lsl #1] +; CHECK-NEXT: bl foo +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: strh w20, [x19, x21, lsl #1] +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -49,36 +33,20 @@ } define i32 @word(ptr %ctx, i32 %xor72) nounwind { -; CHECK0-LABEL: word: -; CHECK0: // %bb.0: -; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-NEXT: ubfx x8, x1, #9, #8 -; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: lsl x21, x8, #2 -; CHECK0-NEXT: mov x19, x0 -; CHECK0-NEXT: ldr w20, [x0, x21] -; CHECK0-NEXT: bl foo -; CHECK0-NEXT: mov w0, w20 -; CHECK0-NEXT: str w20, [x19, x21] -; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: word: -; CHECK3: // %bb.0: -; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK3-NEXT: ubfx x21, x1, #9, #8 -; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK3-NEXT: mov x19, x0 -; CHECK3-NEXT: ldr w20, [x0, x21, lsl #2] -; CHECK3-NEXT: bl foo -; CHECK3-NEXT: mov w0, w20 -; CHECK3-NEXT: str w20, [x19, x21, lsl #2] -; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK3-NEXT: ret +; CHECK-LABEL: word: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x21, x1, #9, #8 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: ldr w20, [x0, x21, lsl #2] +; CHECK-NEXT: bl foo +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: str w20, [x19, x21, lsl #2] +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -90,36 +58,20 @@ } define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind { -; CHECK0-LABEL: doubleword: -; CHECK0: // %bb.0: -; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-NEXT: ubfx x8, x1, #9, #8 -; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: lsl x21, x8, #3 -; CHECK0-NEXT: mov x19, x0 -; CHECK0-NEXT: ldr x20, [x0, x21] -; CHECK0-NEXT: bl foo -; CHECK0-NEXT: mov x0, x20 -; CHECK0-NEXT: str x20, [x19, x21] -; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: doubleword: -; CHECK3: // %bb.0: -; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK3-NEXT: ubfx x21, x1, #9, #8 -; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK3-NEXT: mov x19, x0 -; CHECK3-NEXT: ldr x20, [x0, x21, lsl #3] -; CHECK3-NEXT: bl foo -; CHECK3-NEXT: mov x0, x20 -; CHECK3-NEXT: str x20, [x19, x21, lsl #3] -; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK3-NEXT: ret +; CHECK-LABEL: doubleword: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x21, x1, #9, #8 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: ldr x20, [x0, x21, lsl #3] +; CHECK-NEXT: bl foo +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: str x20, [x19, x21, lsl #3] +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -163,20 +115,12 @@ } define i64 @gep3(ptr %p, i64 %b) { -; CHECK0-LABEL: gep3: -; CHECK0: // %bb.0: -; CHECK0-NEXT: lsl x9, x1, #3 -; CHECK0-NEXT: mov x8, x0 -; CHECK0-NEXT: ldr x0, [x0, x9] -; CHECK0-NEXT: str x1, [x8, x9] -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: gep3: -; CHECK3: // %bb.0: -; CHECK3-NEXT: mov x8, x0 -; CHECK3-NEXT: ldr x0, [x0, x1, lsl #3] -; CHECK3-NEXT: str x1, [x8, x1, lsl #3] -; CHECK3-NEXT: ret +; CHECK-LABEL: gep3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldr x0, [x0, x1, lsl #3] +; CHECK-NEXT: str x1, [x8, x1, lsl #3] +; CHECK-NEXT: ret %g = getelementptr inbounds i64, ptr %p, i64 %b %l = load i64, ptr %g store i64 %b, ptr %g @@ -225,3 +169,6 @@ %r = xor i64 %y, %z ret i64 %r } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK0: {{.*}} +; CHECK3: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll --- a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll @@ -1,10 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -O3 -mtriple arm64-apple-ios3 -aarch64-enable-gep-opt=false %s -o - | FileCheck %s ; @block = common global ptr null, align 8 define i32 @fct(i32 %i1, i32 %i2) { -; CHECK: @fct +; CHECK-LABEL: fct: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x10, _block@GOTPAGE +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr x10, [x10, _block@GOTPAGEOFF] +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: ldr x10, [x10] +; CHECK-NEXT: ldrb w11, [x10, x8] +; CHECK-NEXT: ldrb w12, [x10, x9] +; CHECK-NEXT: cmp w11, w12 +; CHECK-NEXT: b.ne LBB0_3 +; CHECK-NEXT: ; %bb.1: ; %if.end +; CHECK-NEXT: add x8, x8, x10 +; CHECK-NEXT: add x9, x9, x10 +; CHECK-NEXT: ldrb w10, [x8, #1] +; CHECK-NEXT: ldrb w11, [x9, #1] +; CHECK-NEXT: cmp w10, w11 +; CHECK-NEXT: b.ne LBB0_3 +; CHECK-NEXT: ; %bb.2: ; %if.end23 +; CHECK-NEXT: ldrb w8, [x8, #2] +; CHECK-NEXT: ldrb w9, [x9, #2] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: mov w8, #1 ; =0x1 +; CHECK-NEXT: cset w9, hi +; CHECK-NEXT: csel w0, w8, w9, eq +; CHECK-NEXT: ret +; CHECK-NEXT: LBB0_3: ; %if.then18 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2 ; Sign extension is used more than once, thus it should not be folded. ; CodeGenPrepare is not sharing sext across uses, thus this is folded because ; of that. @@ -65,10 +100,42 @@ } define i32 @fct1(i32 %i1, i32 %i2) optsize { -; CHECK: @fct1 +; CHECK-LABEL: fct1: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: adrp x8, _block@GOTPAGE +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: Lloh4: +; CHECK-NEXT: ldr x8, [x8, _block@GOTPAGEOFF] +; CHECK-NEXT: Lloh5: +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: ldrb w9, [x8, w0, sxtw] +; CHECK-NEXT: ldrb w10, [x8, w1, sxtw] +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: b.ne LBB1_3 +; CHECK-NEXT: ; %bb.1: ; %if.end +; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: sxtw x10, w1 +; CHECK-NEXT: add x9, x9, x8 +; CHECK-NEXT: add x8, x10, x8 +; CHECK-NEXT: ldrb w10, [x9, #1] +; CHECK-NEXT: ldrb w11, [x8, #1] +; CHECK-NEXT: cmp w10, w11 +; CHECK-NEXT: b.ne LBB1_3 +; CHECK-NEXT: ; %bb.2: ; %if.end23 +; CHECK-NEXT: ldrb w9, [x9, #2] +; CHECK-NEXT: ldrb w8, [x8, #2] +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: mov w8, #1 ; =0x1 +; CHECK-NEXT: cset w9, hi +; CHECK-NEXT: csel w0, w8, w9, eq +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_3: ; %if.then +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5 ; Addressing are folded when optimizing for code size. -; CHECK: , sxtw] -; CHECK: , sxtw] entry: %idxprom = sext i32 %i1 to i64 %0 = load ptr, ptr @block, align 8 @@ -127,6 +194,18 @@ ; CHECK: @test ; CHECK-NOT: , uxtw #2] define i32 @test(ptr %array, i8 zeroext %c, i32 %arg) { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cmn w1, w2 +; CHECK-NEXT: b.ne LBB2_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB2_2: ; %if.then +; CHECK-NEXT: ldr w8, [x0, w1, uxtw #2] +; CHECK-NEXT: ldr w9, [x0, w1, uxtw #2] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret entry: %conv = zext i8 %c to i32 %add = sub i32 0, %arg @@ -151,6 +230,18 @@ ; CHECK: , uxtw #2] ; CHECK: , uxtw #2] define i32 @test2(ptr %array, i8 zeroext %c, i32 %arg) optsize { +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cmn w1, w2 +; CHECK-NEXT: b.ne LBB3_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: LBB3_2: ; %if.then +; CHECK-NEXT: ldr w8, [x0, w1, uxtw #2] +; CHECK-NEXT: ldr w9, [x0, w1, uxtw #2] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret entry: %conv = zext i8 %c to i32 %add = sub i32 0, %arg diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-address.ll b/llvm/test/CodeGen/AArch64/arm64-fold-address.ll --- a/llvm/test/CodeGen/AArch64/arm64-fold-address.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fold-address.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -O2 -mtriple=arm64-apple-darwin | FileCheck %s %0 = type opaque @@ -9,11 +10,6 @@ define hidden %struct.CGRect @nofold(ptr nocapture %self, ptr nocapture %_cmd) nounwind readonly optsize ssp { entry: -; CHECK-LABEL: nofold: -; CHECK: add x[[REG:[0-9]+]], x0, x{{[0-9]+}} -; CHECK: ldp d0, d1, [x[[REG]]] -; CHECK: ldp d2, d3, [x[[REG]], #16] -; CHECK: ret %ivar = load i64, ptr @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4 %add.ptr = getelementptr inbounds i8, ptr %self, i64 %ivar %tmp11 = load double, ptr %add.ptr, align 8 @@ -37,10 +33,6 @@ define hidden %struct.CGRect @fold(ptr nocapture %self, ptr nocapture %_cmd) nounwind readonly optsize ssp { entry: -; CHECK-LABEL: fold: -; CHECK: ldr d0, [x0, x{{[0-9]+}}] -; CHECK-NOT: add x0, x0, x1 -; CHECK: ret %ivar = load i64, ptr @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4 %add.ptr = getelementptr inbounds i8, ptr %self, i64 %ivar %tmp11 = load double, ptr %add.ptr, align 8 @@ -67,3 +59,5 @@ !2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"} !3 = !{i32 4, !"Objective-C Garbage Collection", i32 0} !4 = !{} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lsl.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lsl.ll --- a/llvm/test/CodeGen/AArch64/arm64-fold-lsl.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fold-lsl.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s ; ; @@ -8,8 +9,11 @@ define i16 @load_halfword(ptr %ctx, i32 %xor72) nounwind { ; CHECK-LABEL: load_halfword: -; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 -; CHECK: ldrh w0, [x0, [[REG]], lsl #1] +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x8, x1, #9, #8 +; CHECK-NEXT: ldrh w0, [x0, x8, lsl #1] +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -20,8 +24,11 @@ define i32 @load_word(ptr %ctx, i32 %xor72) nounwind { ; CHECK-LABEL: load_word: -; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 -; CHECK: ldr w0, [x0, [[REG]], lsl #2] +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x8, x1, #9, #8 +; CHECK-NEXT: ldr w0, [x0, x8, lsl #2] +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -32,8 +39,11 @@ define i64 @load_doubleword(ptr %ctx, i32 %xor72) nounwind { ; CHECK-LABEL: load_doubleword: -; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 -; CHECK: ldr x0, [x0, [[REG]], lsl #3] +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x8, x1, #9, #8 +; CHECK-NEXT: ldr x0, [x0, x8, lsl #3] +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -44,8 +54,11 @@ define void @store_halfword(ptr %ctx, i32 %xor72, i16 %val) nounwind { ; CHECK-LABEL: store_halfword: -; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 -; CHECK: strh w2, [x0, [[REG]], lsl #1] +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x8, x1, #9, #8 +; CHECK-NEXT: strh w2, [x0, x8, lsl #1] +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -56,8 +69,11 @@ define void @store_word(ptr %ctx, i32 %xor72, i32 %val) nounwind { ; CHECK-LABEL: store_word: -; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 -; CHECK: str w2, [x0, [[REG]], lsl #2] +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x8, x1, #9, #8 +; CHECK-NEXT: str w2, [x0, x8, lsl #2] +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -68,8 +84,11 @@ define void @store_doubleword(ptr %ctx, i32 %xor72, i64 %val) nounwind { ; CHECK-LABEL: store_doubleword: -; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 -; CHECK: str x2, [x0, [[REG]], lsl #3] +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x8, x1, #9, #8 +; CHECK-NEXT: str x2, [x0, x8, lsl #3] +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -83,7 +102,10 @@ define i32 @load_doubleword_trunc_word(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_word: -; CHECK: ldr x0, [x0, x1, lsl #3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x0, [x0, x1, lsl #3] +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -93,7 +115,10 @@ define i16 @load_doubleword_trunc_halfword(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_halfword: -; CHECK: ldr x0, [x0, x1, lsl #3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x0, [x0, x1, lsl #3] +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -103,7 +128,10 @@ define i8 @load_doubleword_trunc_byte(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_byte: -; CHECK: ldr x0, [x0, x1, lsl #3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x0, [x0, x1, lsl #3] +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -112,9 +140,11 @@ } define i16 @load_word_trunc_halfword(ptr %ptr, i64 %off) { -entry: ; CHECK-LABEL: load_word_trunc_halfword: -; CHECK: ldr w0, [x0, x1, lsl #2] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w0, [x0, x1, lsl #2] +; CHECK-NEXT: ret +entry: %idx = getelementptr inbounds i32, ptr %ptr, i64 %off %x = load i32, ptr %idx, align 8 %trunc = trunc i32 %x to i16 @@ -123,7 +153,9 @@ define i8 @load_word_trunc_byte(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_word_trunc_byte: -; CHECK: ldr w0, [x0, x1, lsl #2] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w0, [x0, x1, lsl #2] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i32, ptr %ptr, i64 %off %x = load i32, ptr %idx, align 8 @@ -133,7 +165,9 @@ define i8 @load_halfword_trunc_byte(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_halfword_trunc_byte: -; CHECK: ldrh w0, [x0, x1, lsl #1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldrh w0, [x0, x1, lsl #1] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i16, ptr %ptr, i64 %off %x = load i16, ptr %idx, align 8 @@ -146,8 +180,10 @@ define i64 @load_doubleword_trunc_word_zext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_word_zext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #3 -; CHECK: ldr w0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: ldr w0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -158,8 +194,10 @@ define i64 @load_doubleword_trunc_halfword_zext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_halfword_zext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #3 -; CHECK: ldrh w0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: ldrh w0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -170,8 +208,10 @@ define i64 @load_doubleword_trunc_byte_zext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_byte_zext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #3 -; CHECK: ldrb w0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -182,8 +222,10 @@ define i64 @load_word_trunc_halfword_zext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_word_trunc_halfword_zext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #2 -; CHECK: ldrh w0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #2 +; CHECK-NEXT: ldrh w0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i32, ptr %ptr, i64 %off %x = load i32, ptr %idx, align 8 @@ -194,8 +236,10 @@ define i64 @load_word_trunc_byte_zext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_word_trunc_byte_zext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #2 -; CHECK: ldrb w0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #2 +; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i32, ptr %ptr, i64 %off %x = load i32, ptr %idx, align 8 @@ -206,8 +250,10 @@ define i64 @load_halfword_trunc_byte_zext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_halfword_trunc_byte_zext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #1 -; CHECK: ldrb w0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #1 +; CHECK-NEXT: ldrb w0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i16, ptr %ptr, i64 %off %x = load i16, ptr %idx, align 8 @@ -221,8 +267,10 @@ define i64 @load_doubleword_trunc_word_sext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_word_sext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #3 -; CHECK: ldrsw x0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: ldrsw x0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -233,8 +281,10 @@ define i64 @load_doubleword_trunc_halfword_sext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_halfword_sext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #3 -; CHECK: ldrsh x0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: ldrsh x0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -245,8 +295,10 @@ define i64 @load_doubleword_trunc_byte_sext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_byte_sext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #3 -; CHECK: ldrsb x0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: ldrsb x0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -257,8 +309,10 @@ define i64 @load_word_trunc_halfword_sext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_word_trunc_halfword_sext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #2 -; CHECK: ldrsh x0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #2 +; CHECK-NEXT: ldrsh x0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i32, ptr %ptr, i64 %off %x = load i32, ptr %idx, align 8 @@ -269,8 +323,10 @@ define i64 @load_word_trunc_byte_sext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_word_trunc_byte_sext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #2 -; CHECK: ldrsb x0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #2 +; CHECK-NEXT: ldrsb x0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i32, ptr %ptr, i64 %off %x = load i32, ptr %idx, align 8 @@ -281,8 +337,10 @@ define i64 @load_halfword_trunc_byte_sext(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_halfword_trunc_byte_sext: -; CHECK: lsl [[REG:x[0-9]+]], x1, #1 -; CHECK: ldrsb x0, [x0, [[REG]]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #1 +; CHECK-NEXT: ldrsb x0, [x0, x8] +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i16, ptr %ptr, i64 %off %x = load i16, ptr %idx, align 8 @@ -296,9 +354,11 @@ define i32 @load_doubleword_trunc_word_reuse_shift(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_word_reuse_shift: -; CHECK: lsl x[[REG1:[0-9]+]], x1, #3 -; CHECK: ldr w[[REG2:[0-9]+]], [x0, x[[REG1]]] -; CHECK: add w0, w[[REG2]], w[[REG1]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: ldr w9, [x0, x8] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -311,9 +371,11 @@ define i16 @load_doubleword_trunc_halfword_reuse_shift(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_halfword_reuse_shift: -; CHECK: lsl x[[REG1:[0-9]+]], x1, #3 -; CHECK: ldrh w[[REG2:[0-9]+]], [x0, x[[REG1]]] -; CHECK: add w0, w[[REG2]], w[[REG1]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: ldrh w9, [x0, x8] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -326,9 +388,11 @@ define i8 @load_doubleword_trunc_byte_reuse_shift(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_doubleword_trunc_byte_reuse_shift: -; CHECK: lsl x[[REG1:[0-9]+]], x1, #3 -; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]] -; CHECK: add w0, w[[REG2]], w[[REG1]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: ldrb w9, [x0, x8] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i64, ptr %ptr, i64 %off %x = load i64, ptr %idx, align 8 @@ -340,11 +404,13 @@ } define i16 @load_word_trunc_halfword_reuse_shift(ptr %ptr, i64 %off) { -entry: ; CHECK-LABEL: load_word_trunc_halfword_reuse_shift: -; CHECK: lsl x[[REG1:[0-9]+]], x1, #2 -; CHECK: ldrh w[[REG2:[0-9]+]], [x0, x[[REG1]]] -; CHECK: add w0, w[[REG2]], w[[REG1]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #2 +; CHECK-NEXT: ldrh w9, [x0, x8] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret +entry: %idx = getelementptr inbounds i32, ptr %ptr, i64 %off %x = load i32, ptr %idx, align 8 %trunc = trunc i32 %x to i16 @@ -356,9 +422,11 @@ define i8 @load_word_trunc_byte_reuse_shift(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_word_trunc_byte_reuse_shift: -; CHECK: lsl x[[REG1:[0-9]+]], x1, #2 -; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]] -; CHECK: add w0, w[[REG2]], w[[REG1]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #2 +; CHECK-NEXT: ldrb w9, [x0, x8] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i32, ptr %ptr, i64 %off %x = load i32, ptr %idx, align 8 @@ -371,9 +439,11 @@ define i8 @load_halfword_trunc_byte_reuse_shift(ptr %ptr, i64 %off) { ; CHECK-LABEL: load_halfword_trunc_byte_reuse_shift: -; CHECK: lsl x[[REG1:[0-9]+]], x1, #1 -; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]] -; CHECK: add w0, w[[REG2]], w[[REG1]] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsl x8, x1, #1 +; CHECK-NEXT: ldrb w9, [x0, x8] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret entry: %idx = getelementptr inbounds i16, ptr %ptr, i64 %off %x = load i16, ptr %idx, align 8