Index: llvm/lib/Target/AArch64/AArch64SchedA55.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SchedA55.td +++ llvm/lib/Target/AArch64/AArch64SchedA55.td @@ -63,15 +63,15 @@ // These latencies are modeled without taking into account forwarding paths // (the software optimisation guide lists latencies taking into account // typical forwarding paths). -def : WriteRes { let Latency = 3; } // MOVN, MOVZ -def : WriteRes { let Latency = 3; } // ALU -def : WriteRes { let Latency = 3; } // ALU of Shifted-Reg -def : WriteRes { let Latency = 3; } // ALU of Extended-Reg -def : WriteRes { let Latency = 3; } // EXTR from a reg pair -def : WriteRes { let Latency = 3; } // Shift/Scale +def : WriteRes { let Latency = 1; } // MOVN, MOVZ +def : WriteRes { let Latency = 1; } // ALU +def : WriteRes { let Latency = 2; } // ALU of Shifted-Reg +def : WriteRes { let Latency = 2; } // ALU of Extended-Reg +def : WriteRes { let Latency = 2; } // EXTR from a reg pair +def : WriteRes { let Latency = 2; } // Shift/Scale // MAC -def : WriteRes { let Latency = 4; } // 32-bit Multiply +def : WriteRes { let Latency = 3; } // 32-bit Multiply def : WriteRes { let Latency = 4; } // 64-bit Multiply // Div @@ -208,56 +208,30 @@ // Subtarget-specific SchedRead types. def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; def : ReadAdvance; -// ALU - ALU input operands are generally needed in EX1. An operand produced in -// in say EX2 can be forwarded for consumption to ALU in EX1, thereby -// allowing back-to-back ALU operations such as add. If an operand requires -// a shift, it will, however, be required in ISS stage. -def : ReadAdvance; -// Shifted operand -def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI, - WriteISReg, WriteIEReg,WriteIS, - WriteID32,WriteID64, - WriteIM32,WriteIM64]>; -def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI, - WriteISReg, WriteIEReg,WriteIS, - WriteID32,WriteID64, - WriteIM32,WriteIM64]>; -def CortexA55ReadISReg : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; - -def CortexA55ReadIEReg : SchedReadVariant<[ - SchedVar, - SchedVar]>; -def : SchedAlias; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; // MUL -def : ReadAdvance; -def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; // Div -def : ReadAdvance; +def : ReadAdvance; //===----------------------------------------------------------------------===// // Subtarget-specific InstRWs. +def A55WriteISReg : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[A55WriteISReg], (instregex ".*rs$")>; +def : InstRW<[WriteIS], (instrs RBITWr, RBITXr)>; + //--- // Miscellaneous //--- Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -1139,14 +1139,14 @@ define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_8: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-NOLSE-O1-NEXT: ldrb w9, [x0, #4095] -; CHECK-NOLSE-O1-NEXT: ldrb w10, [x0, w1, sxtw] -; CHECK-NOLSE-O1-NEXT: ldurb w11, [x0, #-256] -; CHECK-NOLSE-O1-NEXT: ldrb w8, [x8] -; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 -; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 -; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 +; CHECK-NOLSE-O1-NEXT: ldrb w8, [x0, #4095] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldrb w9, [x0, w1, sxtw] +; CHECK-NOLSE-O1-NEXT: ldurb w10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: ldrb w11, [x11] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 +; CHECK-NOLSE-O1-NEXT: add w0, w8, w11 ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_8: @@ -1191,14 +1191,14 @@ ; ; CHECK-LDAPR-O1-LABEL: atomic_load_relaxed_8: ; CHECK-LDAPR-O1: ; %bb.0: -; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-LDAPR-O1-NEXT: ldrb w9, [x0, #4095] -; CHECK-LDAPR-O1-NEXT: ldrb w10, [x0, w1, sxtw] -; CHECK-LDAPR-O1-NEXT: ldurb w11, [x0, #-256] -; CHECK-LDAPR-O1-NEXT: ldrb w8, [x8] -; CHECK-LDAPR-O1-NEXT: add w9, w9, w10 -; CHECK-LDAPR-O1-NEXT: add w9, w9, w11 -; CHECK-LDAPR-O1-NEXT: add w0, w9, w8 +; CHECK-LDAPR-O1-NEXT: ldrb w8, [x0, #4095] +; CHECK-LDAPR-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-LDAPR-O1-NEXT: ldrb w9, [x0, w1, sxtw] +; CHECK-LDAPR-O1-NEXT: ldurb w10, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: ldrb w11, [x11] +; CHECK-LDAPR-O1-NEXT: add w8, w8, w9 +; CHECK-LDAPR-O1-NEXT: add w8, w8, w10 +; CHECK-LDAPR-O1-NEXT: add w0, w8, w11 ; CHECK-LDAPR-O1-NEXT: ret ; ; CHECK-LDAPR-O0-LABEL: atomic_load_relaxed_8: @@ -1235,14 +1235,14 @@ define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_16: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-NOLSE-O1-NEXT: ldrh w9, [x0, #8190] -; CHECK-NOLSE-O1-NEXT: ldrh w10, [x0, w1, sxtw #1] -; CHECK-NOLSE-O1-NEXT: ldurh w11, [x0, #-256] -; CHECK-NOLSE-O1-NEXT: ldrh w8, [x8] -; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 -; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 -; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 +; CHECK-NOLSE-O1-NEXT: ldrh w8, [x0, #8190] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldrh w9, [x0, w1, sxtw #1] +; CHECK-NOLSE-O1-NEXT: ldurh w10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: ldrh w11, [x11] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 +; CHECK-NOLSE-O1-NEXT: add w0, w8, w11 ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_16: @@ -1287,14 +1287,14 @@ ; ; CHECK-LDAPR-O1-LABEL: atomic_load_relaxed_16: ; CHECK-LDAPR-O1: ; %bb.0: -; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-LDAPR-O1-NEXT: ldrh w9, [x0, #8190] -; CHECK-LDAPR-O1-NEXT: ldrh w10, [x0, w1, sxtw #1] -; CHECK-LDAPR-O1-NEXT: ldurh w11, [x0, #-256] -; CHECK-LDAPR-O1-NEXT: ldrh w8, [x8] -; CHECK-LDAPR-O1-NEXT: add w9, w9, w10 -; CHECK-LDAPR-O1-NEXT: add w9, w9, w11 -; CHECK-LDAPR-O1-NEXT: add w0, w9, w8 +; CHECK-LDAPR-O1-NEXT: ldrh w8, [x0, #8190] +; CHECK-LDAPR-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-LDAPR-O1-NEXT: ldrh w9, [x0, w1, sxtw #1] +; CHECK-LDAPR-O1-NEXT: ldurh w10, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: ldrh w11, [x11] +; CHECK-LDAPR-O1-NEXT: add w8, w8, w9 +; CHECK-LDAPR-O1-NEXT: add w8, w8, w10 +; CHECK-LDAPR-O1-NEXT: add w0, w8, w11 ; CHECK-LDAPR-O1-NEXT: ret ; ; CHECK-LDAPR-O0-LABEL: atomic_load_relaxed_16: @@ -1331,14 +1331,14 @@ define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_32: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-NOLSE-O1-NEXT: ldr w9, [x0, #16380] -; CHECK-NOLSE-O1-NEXT: ldr w10, [x0, w1, sxtw #2] -; CHECK-NOLSE-O1-NEXT: ldur w11, [x0, #-256] -; CHECK-NOLSE-O1-NEXT: ldr w8, [x8] -; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 -; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 -; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 +; CHECK-NOLSE-O1-NEXT: ldr w8, [x0, #16380] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldr w9, [x0, w1, sxtw #2] +; CHECK-NOLSE-O1-NEXT: ldur w10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: ldr w11, [x11] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 +; CHECK-NOLSE-O1-NEXT: add w0, w8, w11 ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_32: @@ -1379,14 +1379,14 @@ ; ; CHECK-LDAPR-O1-LABEL: atomic_load_relaxed_32: ; CHECK-LDAPR-O1: ; %bb.0: -; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-LDAPR-O1-NEXT: ldr w9, [x0, #16380] -; CHECK-LDAPR-O1-NEXT: ldr w10, [x0, w1, sxtw #2] -; CHECK-LDAPR-O1-NEXT: ldur w11, [x0, #-256] -; CHECK-LDAPR-O1-NEXT: ldr w8, [x8] -; CHECK-LDAPR-O1-NEXT: add w9, w9, w10 -; CHECK-LDAPR-O1-NEXT: add w9, w9, w11 -; CHECK-LDAPR-O1-NEXT: add w0, w9, w8 +; CHECK-LDAPR-O1-NEXT: ldr w8, [x0, #16380] +; CHECK-LDAPR-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-LDAPR-O1-NEXT: ldr w9, [x0, w1, sxtw #2] +; CHECK-LDAPR-O1-NEXT: ldur w10, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: ldr w11, [x11] +; CHECK-LDAPR-O1-NEXT: add w8, w8, w9 +; CHECK-LDAPR-O1-NEXT: add w8, w8, w10 +; CHECK-LDAPR-O1-NEXT: add w0, w8, w11 ; CHECK-LDAPR-O1-NEXT: ret ; ; CHECK-LDAPR-O0-LABEL: atomic_load_relaxed_32: @@ -1421,14 +1421,14 @@ define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_64: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-NOLSE-O1-NEXT: ldr x9, [x0, #32760] -; CHECK-NOLSE-O1-NEXT: ldr x10, [x0, w1, sxtw #3] -; CHECK-NOLSE-O1-NEXT: ldur x11, [x0, #-256] -; CHECK-NOLSE-O1-NEXT: ldr x8, [x8] -; CHECK-NOLSE-O1-NEXT: add x9, x9, x10 -; CHECK-NOLSE-O1-NEXT: add x9, x9, x11 -; CHECK-NOLSE-O1-NEXT: add x0, x9, x8 +; CHECK-NOLSE-O1-NEXT: ldr x8, [x0, #32760] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldr x9, [x0, w1, sxtw #3] +; CHECK-NOLSE-O1-NEXT: ldur x10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: ldr x11, [x11] +; CHECK-NOLSE-O1-NEXT: add x8, x8, x9 +; CHECK-NOLSE-O1-NEXT: add x8, x8, x10 +; CHECK-NOLSE-O1-NEXT: add x0, x8, x11 ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_64: @@ -1469,14 +1469,14 @@ ; ; CHECK-LDAPR-O1-LABEL: atomic_load_relaxed_64: ; CHECK-LDAPR-O1: ; %bb.0: -; CHECK-LDAPR-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-LDAPR-O1-NEXT: ldr x9, [x0, #32760] -; CHECK-LDAPR-O1-NEXT: ldr x10, [x0, w1, sxtw #3] -; CHECK-LDAPR-O1-NEXT: ldur x11, [x0, #-256] -; CHECK-LDAPR-O1-NEXT: ldr x8, [x8] -; CHECK-LDAPR-O1-NEXT: add x9, x9, x10 -; CHECK-LDAPR-O1-NEXT: add x9, x9, x11 -; CHECK-LDAPR-O1-NEXT: add x0, x9, x8 +; CHECK-LDAPR-O1-NEXT: ldr x8, [x0, #32760] +; CHECK-LDAPR-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-LDAPR-O1-NEXT: ldr x9, [x0, w1, sxtw #3] +; CHECK-LDAPR-O1-NEXT: ldur x10, [x0, #-256] +; CHECK-LDAPR-O1-NEXT: ldr x11, [x11] +; CHECK-LDAPR-O1-NEXT: add x8, x8, x9 +; CHECK-LDAPR-O1-NEXT: add x8, x8, x10 +; CHECK-LDAPR-O1-NEXT: add x0, x8, x11 ; CHECK-LDAPR-O1-NEXT: ret ; ; CHECK-LDAPR-O0-LABEL: atomic_load_relaxed_64: Index: llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll @@ -122,8 +122,9 @@ ; CHECK-NEXT: .cfi_offset w26, -80 ; CHECK-NEXT: .cfi_offset w27, -88 ; CHECK-NEXT: .cfi_offset w28, -96 -; CHECK-NEXT: add x9, sp, #128 -; CHECK-NEXT: add x10, sp, #256 +; CHECK-NEXT: mov x27, x8 +; CHECK-NEXT: add x8, sp, #128 +; CHECK-NEXT: add x9, sp, #256 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: mov x21, x2 @@ -133,11 +134,10 @@ ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 ; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill -; CHECK-NEXT: mov x27, x8 ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill ; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill -; CHECK-NEXT: str x10, [x9] +; CHECK-NEXT: str x9, [x8] ; CHECK-NEXT: bl _get_f ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: mov x9, x0 @@ -195,9 +195,9 @@ ; CHECK-NEXT: Lloh2: ; CHECK-NEXT: adrp x10, _g@GOTPAGE ; CHECK-NEXT: ldr x9, [x0, #16] -; CHECK-NEXT: mov w11, #42 ; CHECK-NEXT: Lloh3: ; CHECK-NEXT: ldr x10, [x10, _g@GOTPAGEOFF] +; CHECK-NEXT: mov w11, #42 ; CHECK-NEXT: Lloh4: ; CHECK-NEXT: str w11, [x10] ; CHECK-NEXT: br x9 Index: llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -35,10 +35,10 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI1_0 -; SDAG-NEXT: adrp x9, .LCPI1_1 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; SDAG-NEXT: adrp x8, .LCPI1_1 +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_1] ; SDAG-NEXT: adrp x8, .LCPI1_2 -; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI1_1] ; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v3.4s, v1.8h, v2.8h ; SDAG-NEXT: umull v1.4s, v1.4h, v2.4h @@ -57,13 +57,11 @@ ; GISEL-LABEL: combine_vec_udiv_nonuniform: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI1_4 -; GISEL-NEXT: adrp x9, .LCPI1_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4] ; GISEL-NEXT: adrp x8, .LCPI1_3 -; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0] -; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; GISEL-NEXT: adrp x8, .LCPI1_2 +; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h @@ -74,10 +72,12 @@ ; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h ; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5] +; GISEL-NEXT: adrp x8, .LCPI1_0 +; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0] ; GISEL-NEXT: adrp x8, .LCPI1_1 -; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h ; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_1] +; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: shl v3.8h, v3.8h, #15 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h ; GISEL-NEXT: neg v2.8h, v4.8h @@ -93,10 +93,10 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform2: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI2_0 -; SDAG-NEXT: adrp x9, .LCPI2_1 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] +; SDAG-NEXT: adrp x8, .LCPI2_1 +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI2_1] ; SDAG-NEXT: adrp x8, .LCPI2_2 -; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v1.4s, v0.8h, v2.8h ; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h @@ -108,21 +108,21 @@ ; GISEL-LABEL: combine_vec_udiv_nonuniform2: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI2_3 -; GISEL-NEXT: adrp x9, .LCPI2_4 -; GISEL-NEXT: adrp x10, .LCPI2_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3] ; GISEL-NEXT: adrp x8, .LCPI2_2 -; GISEL-NEXT: ldr q4, [x10, :lo12:.LCPI2_0] -; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] -; GISEL-NEXT: adrp x8, .LCPI2_1 +; GISEL-NEXT: adrp x8, .LCPI2_4 +; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h -; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1] ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_4] -; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_4] +; GISEL-NEXT: adrp x8, .LCPI2_0 +; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI2_0] +; GISEL-NEXT: adrp x8, .LCPI2_1 +; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1] ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h +; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h ; GISEL-NEXT: neg v3.8h, v5.8h ; GISEL-NEXT: shl v2.8h, v2.8h, #15 ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h @@ -151,20 +151,20 @@ ; GISEL-LABEL: combine_vec_udiv_nonuniform3: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI3_2 -; GISEL-NEXT: adrp x9, .LCPI3_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2] ; GISEL-NEXT: adrp x8, .LCPI3_3 -; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI3_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] +; GISEL-NEXT: adrp x8, .LCPI3_0 +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_0] ; GISEL-NEXT: adrp x8, .LCPI3_1 -; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h ; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h +; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_1] -; GISEL-NEXT: shl v2.8h, v2.8h, #15 ; GISEL-NEXT: usra v1.8h, v4.8h, #1 +; GISEL-NEXT: shl v2.8h, v2.8h, #15 ; GISEL-NEXT: neg v3.8h, v3.8h ; GISEL-NEXT: sshr v2.8h, v2.8h, #15 ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h @@ -178,14 +178,14 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform4: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI4_0 -; SDAG-NEXT: adrp x9, .LCPI4_2 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; SDAG-NEXT: adrp x8, .LCPI4_1 -; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_2] ; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b ; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b ; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] +; SDAG-NEXT: adrp x8, .LCPI4_2 +; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI4_2] ; SDAG-NEXT: adrp x8, .LCPI4_3 ; SDAG-NEXT: ushl v1.16b, v1.16b, v2.16b ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_3] @@ -197,18 +197,18 @@ ; GISEL-LABEL: combine_vec_udiv_nonuniform4: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI4_2 -; GISEL-NEXT: adrp x9, .LCPI4_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2] ; GISEL-NEXT: adrp x8, .LCPI4_3 -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0] -; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3] -; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b +; GISEL-NEXT: adrp x8, .LCPI4_0 +; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI4_0] ; GISEL-NEXT: adrp x8, .LCPI4_1 +; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b +; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b ; GISEL-NEXT: cmeq v3.16b, v3.16b, v4.16b +; GISEL-NEXT: shl v3.16b, v3.16b, #7 ; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] -; GISEL-NEXT: shl v3.16b, v3.16b, #7 ; GISEL-NEXT: neg v2.16b, v2.16b ; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b ; GISEL-NEXT: sshr v2.16b, v3.16b, #7 @@ -222,7 +222,6 @@ ; SDAG-LABEL: pr38477: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI5_0 -; SDAG-NEXT: adrp x9, .LCPI5_3 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; SDAG-NEXT: adrp x8, .LCPI5_1 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h @@ -234,9 +233,10 @@ ; SDAG-NEXT: umull2 v4.4s, v3.8h, v2.8h ; SDAG-NEXT: umull v2.4s, v3.4h, v2.4h ; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_2] -; SDAG-NEXT: adrp x8, .LCPI5_4 +; SDAG-NEXT: adrp x8, .LCPI5_3 ; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; SDAG-NEXT: ldr q4, [x9, :lo12:.LCPI5_3] +; SDAG-NEXT: ldr q4, [x8, :lo12:.LCPI5_3] +; SDAG-NEXT: adrp x8, .LCPI5_4 ; SDAG-NEXT: add v1.8h, v2.8h, v1.8h ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_4] ; SDAG-NEXT: ushl v1.8h, v1.8h, v3.8h @@ -248,10 +248,8 @@ ; GISEL-LABEL: pr38477: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI5_3 -; GISEL-NEXT: adrp x9, .LCPI5_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] ; GISEL-NEXT: adrp x8, .LCPI5_2 -; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h @@ -261,10 +259,12 @@ ; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h ; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4] +; GISEL-NEXT: adrp x8, .LCPI5_0 +; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0] ; GISEL-NEXT: adrp x8, .LCPI5_1 -; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h ; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1] +; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: shl v3.8h, v3.8h, #15 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h ; GISEL-NEXT: neg v2.8h, v4.8h Index: llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll @@ -145,8 +145,8 @@ ; SDAG: ; %bb.0: ; %bb ; SDAG-NEXT: and x8, x1, #0x1 ; SDAG-NEXT: bfi x1, x0, #1, #63 -; SDAG-NEXT: mov x0, x1 ; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: mov x0, x1 ; SDAG-NEXT: ret bb: %tmp3 = shl i64 %in1, 1 Index: llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll @@ -84,12 +84,12 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 ; CHECK-NEXT: mov w9, #7 -; CHECK-NEXT: mov w10, #5 -; CHECK-NEXT: mov w11, #8 ; CHECK-NEXT: strh w8, [x0] +; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: strh w8, [x0, #4] +; CHECK-NEXT: mov w8, #8 ; CHECK-NEXT: strh w9, [x0, #2] -; CHECK-NEXT: strh w10, [x0, #4] -; CHECK-NEXT: strh w11, [x0, #6] +; CHECK-NEXT: strh w8, [x0, #6] ; CHECK-NEXT: ret %addr1 = getelementptr <2 x i16>, <2 x i16> *%ptr, i64 0 store <2 x i16> , <2 x i16> *%addr1 @@ -119,12 +119,12 @@ define void @test_2x_2xs32(i32 *%ptr, i32 *%ptr2) { ; CHECK-LABEL: test_2x_2xs32: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov x10, #9 ; CHECK-NEXT: mov w8, #4 ; CHECK-NEXT: mov w9, #5 -; CHECK-NEXT: movk x10, #17, lsl #32 ; CHECK-NEXT: stp w8, w9, [x0] -; CHECK-NEXT: str x10, [x1] +; CHECK-NEXT: mov x8, #9 +; CHECK-NEXT: movk x8, #17, lsl #32 +; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret %addr1 = getelementptr i32, i32 *%ptr, i64 0 store i32 4, i32 *%addr1 @@ -183,13 +183,13 @@ ; CHECK-LABEL: test_alias_4xs16: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #9 ; CHECK-NEXT: movk w8, #5, lsl #16 -; CHECK-NEXT: mov w10, #14 -; CHECK-NEXT: strh w9, [x0, #4] ; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: strh w8, [x0, #4] +; CHECK-NEXT: mov w8, #14 ; CHECK-NEXT: strh wzr, [x1] -; CHECK-NEXT: strh w10, [x0, #6] +; CHECK-NEXT: strh w8, [x0, #6] ; CHECK-NEXT: ret %addr1 = getelementptr i16, i16 *%ptr, i64 0 store i16 4, i16 *%addr1 @@ -208,12 +208,12 @@ ; CHECK-LABEL: test_alias2_4xs16: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #5 -; CHECK-NEXT: movk w9, #9, lsl #16 ; CHECK-NEXT: strh w8, [x0] -; CHECK-NEXT: mov w8, #14 +; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: movk w8, #9, lsl #16 ; CHECK-NEXT: strh wzr, [x2] -; CHECK-NEXT: stur w9, [x0, #2] +; CHECK-NEXT: stur w8, [x0, #2] +; CHECK-NEXT: mov w8, #14 ; CHECK-NEXT: strh wzr, [x1] ; CHECK-NEXT: strh w8, [x0, #6] ; CHECK-NEXT: ret @@ -235,16 +235,16 @@ ; CHECK-LABEL: test_alias3_4xs16: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #5 ; CHECK-NEXT: strh w8, [x0] -; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: mov w8, #5 ; CHECK-NEXT: strh wzr, [x2] -; CHECK-NEXT: strh w9, [x0, #2] -; CHECK-NEXT: mov w9, #14 +; CHECK-NEXT: strh w8, [x0, #2] +; CHECK-NEXT: mov w8, #9 ; CHECK-NEXT: strh wzr, [x3] ; CHECK-NEXT: strh w8, [x0, #4] +; CHECK-NEXT: mov w8, #14 ; CHECK-NEXT: strh wzr, [x1] -; CHECK-NEXT: strh w9, [x0, #6] +; CHECK-NEXT: strh w8, [x0, #6] ; CHECK-NEXT: ret %addr1 = getelementptr i16, i16 *%ptr, i64 0 store i16 4, i16 *%addr1 @@ -266,8 +266,8 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: mov x8, #4 ; CHECK-NEXT: ldr w0, [sp, #4] +; CHECK-NEXT: mov x8, #4 ; CHECK-NEXT: movk x8, #5, lsl #32 ; CHECK-NEXT: str x8, [sp, #8] ; CHECK-NEXT: add sp, sp, #32 @@ -302,9 +302,9 @@ ; CHECK-LABEL: test_atomic: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: add x9, x8, #4 ; CHECK-NEXT: stlr wzr, [x8] -; CHECK-NEXT: stlr wzr, [x9] +; CHECK-NEXT: add x8, x8, #4 +; CHECK-NEXT: stlr wzr, [x8] ; CHECK-NEXT: ret entry: %0 = load i32*, i32** %ptr, align 8 Index: llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll @@ -57,8 +57,8 @@ ; "caller2" is the caller of "foo", it calls "foo" inside a loop. define float @caller2(i8* %error_ref) { ; CHECK-LABEL: caller2: -; CHECK: mov [[ID:x[0-9]+]], x0 ; CHECK: fmov [[CMP:s[0-9]+]], #1.0 +; CHECK: mov [[ID:x[0-9]+]], x0 ; CHECK: mov x21, xzr ; CHECK: bl {{.*}}foo ; CHECK: cbnz x21 @@ -221,9 +221,9 @@ ; CHECK: mov w0, #16 ; CHECK: malloc ; CHECK: mov [[ID:w[0-9]+]], #1 -; CHECK: mov x21, x0 ; CHECK-NOT: x21 ; CHECK: strb [[ID]], [x0, #8] +; CHECK: mov x21, x0 ; CHECK-NOT: x21 ; First vararg @@ -346,6 +346,7 @@ ; CHECK: str xzr, [sp] ; CHECK: bl _params_in_reg2 ; Restore original arguments for next call. +; CHECK: ldr x8, [sp ; CHECK: mov x1, x20 ; CHECK: mov x2, x22 ; CHECK: mov x3, x23 @@ -355,7 +356,6 @@ ; CHECK: mov x7, x27 ; Restore original swiftself argument and swifterror %err. ; CHECK: mov x21, x28 -; CHECK: ldr x8, [sp ; CHECK: bl _params_in_reg2 ; Restore calle save registers but don't clober swifterror x21. ; CHECK-NOT: x21 @@ -383,7 +383,7 @@ ; CHECK-LABEL: params_and_return_in_reg ; Store callee saved registers. -; CHECK: stp x28, x21, [sp, #16 +; CHECK: stp x28, x0, [sp, #16 ; CHECK: stp x27, x26, [sp ; CHECK: stp x25, x24, [sp ; CHECK: stp x23, x22, [sp @@ -409,9 +409,9 @@ ; CHECK: mov x21, xzr ; CHECK: bl _params_in_reg2 ; Store swifterror %error_ptr_ref. -; CHECK: stp {{x[0-9]+}}, x21, [sp] ; Setup call arguments from original arguments. -; CHECK: mov x0, x19 +; CHECK: ldr x0, [sp, #24 +; CHECK: stp {{x[0-9]+}}, x21, [sp] ; CHECK: mov x1, x20 ; CHECK: mov x2, x22 ; CHECK: mov x3, x23 @@ -419,19 +419,20 @@ ; CHECK: mov x5, x25 ; CHECK: mov x6, x26 ; CHECK: mov x7, x27 -; CHECK: ldr x21, [sp, #24 +; CHECK: mov x21, x28 ; CHECK: bl _params_and_return_in_reg2 ; Store return values. -; CHECK: mov x19, x0 -; CHECK: mov x20, x1 -; CHECK: mov x22, x2 -; CHECK: mov x23, x3 -; CHECK: mov x24, x4 -; CHECK: mov x25, x5 -; CHECK: mov x26, x6 -; CHECK: mov x27, x7 +; CHECK: mov x19, x21 +; CHECK: ldr x21, [sp, #8] +; CHECK: mov x20, x0 +; CHECK: mov x22, x1 +; CHECK: mov x23, x2 +; CHECK: mov x24, x3 +; CHECK: mov x25, x4 +; CHECK: mov x26, x5 +; CHECK: mov x27, x6 ; Save swifterror %err. -; CHECK: mov x28, x21 +; CHECK: mov x28, x7 ; Setup call. ; CHECK: mov w0, #1 ; CHECK: mov w1, #2 @@ -442,19 +443,18 @@ ; CHECK: mov w6, #7 ; CHECK: mov w7, #8 ; ... setup call with swiferror %error_ptr_ref. -; CHECK: ldr x21, [sp, #8] ; CHECK: bl _params_in_reg2 ; Restore return values for return from this function. -; CHECK: mov x0, x19 -; CHECK: mov x1, x20 -; CHECK: mov x2, x22 -; CHECK: mov x3, x23 -; CHECK: mov x4, x24 -; CHECK: mov x5, x25 -; CHECK: mov x6, x26 -; CHECK: mov x7, x27 +; CHECK: mov x0, x20 +; CHECK: mov x1, x22 +; CHECK: mov x2, x23 +; CHECK: mov x3, x24 +; CHECK: mov x4, x25 +; CHECK: mov x5, x26 +; CHECK: mov x6, x27 +; CHECK: mov x21, x19 ; CHECK: ldp x29, x30, [sp -; CHECK: mov x21, x28 +; CHECK: mov x7, x28 ; Restore callee save registers. ; CHECK: ldp x20, x19, [sp ; CHECK: ldp x23, x22, [sp Index: llvm/test/CodeGen/AArch64/aarch64-be-bv.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-be-bv.ll +++ llvm/test/CodeGen/AArch64/aarch64-be-bv.ll @@ -8,8 +8,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -24,8 +24,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1, lsl #8 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: movi v1.4s, #1, lsl #8 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -40,8 +40,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1, lsl #16 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: movi v1.4s, #1, lsl #16 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -56,8 +56,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1, lsl #24 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: movi v1.4s, #1, lsl #24 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -72,8 +72,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.8h, #1 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: movi v1.8h, #1 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -88,8 +88,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.8h, #1, lsl #8 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: movi v1.8h, #1, lsl #8 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -104,8 +104,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1, msl #8 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: movi v1.4s, #1, msl #8 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -120,8 +120,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1, msl #16 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: movi v1.4s, #1, msl #16 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -136,8 +136,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -152,8 +152,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -168,8 +168,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: fmov v1.4s, #3.00000000 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: fmov v1.4s, #3.00000000 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -184,8 +184,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: fmov v1.2d, #0.17968750 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: fmov v1.2d, #0.17968750 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -200,8 +200,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: mvni v1.4s, #1 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: mvni v1.4s, #1 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -216,8 +216,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: mvni v1.4s, #1, lsl #8 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: mvni v1.4s, #1, lsl #8 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -232,8 +232,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: mvni v1.4s, #1, lsl #16 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: mvni v1.4s, #1, lsl #16 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -248,8 +248,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: mvni v1.4s, #1, lsl #24 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: mvni v1.4s, #1, lsl #24 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -264,8 +264,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: mvni v1.8h, #1 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: mvni v1.8h, #1 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -280,8 +280,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: mvni v1.8h, #1, lsl #8 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: mvni v1.8h, #1, lsl #8 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -296,8 +296,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: mvni v1.4s, #1, msl #8 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: mvni v1.4s, #1, msl #8 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret @@ -312,8 +312,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: mvni v1.4s, #1, msl #16 ; CHECK-NEXT: ld1 { v0.8h }, [x8] +; CHECK-NEXT: mvni v1.4s, #1, msl #16 ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll +++ llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll @@ -100,8 +100,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-NEXT: dup v1.2s, w8 +; CHECK-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ret entry: @@ -118,14 +118,14 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: and x8, x0, #0xffff +; CHECK-NEXT: and x9, x0, #0xffff ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: mul x9, x8, x9 -; CHECK-NEXT: mul x8, x8, x10 -; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: mul x10, x9, x10 +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret entry: @@ -194,11 +194,11 @@ define void @typei1_orig(i64 %a, i8* %p, <8 x i16>* %q) { ; CHECK-LABEL: typei1_orig: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, #0 ; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: cmp x0, #0 ; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: neg v0.8h, v0.8h ; CHECK-NEXT: dup v1.8h, w8 +; CHECK-NEXT: neg v0.8h, v0.8h ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h ; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: cmtst v0.8h, v0.8h, v0.8h Index: llvm/test/CodeGen/AArch64/aarch64-load-ext.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -106,18 +106,18 @@ ; CHECK-LE-LABEL: fsext_v2i32: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: ldrsb w8, [x0] +; CHECK-LE-NEXT: ldrsb w9, [x0, #1] ; CHECK-LE-NEXT: fmov s0, w8 -; CHECK-LE-NEXT: ldrsb w8, [x0, #1] -; CHECK-LE-NEXT: mov v0.s[1], w8 +; CHECK-LE-NEXT: mov v0.s[1], w9 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: fsext_v2i32: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ldrsb w8, [x0] +; CHECK-BE-NEXT: ldrsb w9, [x0, #1] ; CHECK-BE-NEXT: fmov s0, w8 -; CHECK-BE-NEXT: ldrsb w8, [x0, #1] -; CHECK-BE-NEXT: mov v0.s[1], w8 +; CHECK-BE-NEXT: mov v0.s[1], w9 ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret %x = load <2 x i8>, <2 x i8>* %a @@ -251,18 +251,18 @@ ; CHECK-LE-LABEL: fsext_v2i16: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: ldrsb w8, [x0] +; CHECK-LE-NEXT: ldrsb w9, [x0, #1] ; CHECK-LE-NEXT: fmov s0, w8 -; CHECK-LE-NEXT: ldrsb w8, [x0, #1] -; CHECK-LE-NEXT: mov v0.s[1], w8 +; CHECK-LE-NEXT: mov v0.s[1], w9 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: fsext_v2i16: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ldrsb w8, [x0] +; CHECK-BE-NEXT: ldrsb w9, [x0, #1] ; CHECK-BE-NEXT: fmov s0, w8 -; CHECK-BE-NEXT: ldrsb w8, [x0, #1] -; CHECK-BE-NEXT: mov v0.s[1], w8 +; CHECK-BE-NEXT: mov v0.s[1], w9 ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret %x = load <2 x i8>, <2 x i8>* %a Index: llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -193,18 +193,18 @@ ; CHECK-NEXT: b.lt .LBB3_8 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: mov w9, w3 ; CHECK-NEXT: cmp w3, #15 +; CHECK-NEXT: mov w9, w3 ; CHECK-NEXT: b.hi .LBB3_3 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x10, xzr ; CHECK-NEXT: b .LBB3_6 ; CHECK-NEXT: .LBB3_3: // %vector.ph ; CHECK-NEXT: and x10, x9, #0xfffffff0 +; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: add x11, x2, #32 ; CHECK-NEXT: add x12, x0, #16 ; CHECK-NEXT: mov x13, x10 -; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: .LBB3_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp q1, q2, [x12, #-16] @@ -221,15 +221,15 @@ ; CHECK-NEXT: cmp x10, x9 ; CHECK-NEXT: b.eq .LBB3_8 ; CHECK-NEXT: .LBB3_6: // %for.body.preheader1 -; CHECK-NEXT: sub x9, x9, x10 ; CHECK-NEXT: add x11, x2, x10, lsl #2 -; CHECK-NEXT: add x10, x0, x10, lsl #1 +; CHECK-NEXT: add x12, x0, x10, lsl #1 +; CHECK-NEXT: sub x9, x9, x10 ; CHECK-NEXT: .LBB3_7: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh w12, [x10], #2 +; CHECK-NEXT: ldrsh w10, [x12], #2 ; CHECK-NEXT: subs x9, x9, #1 -; CHECK-NEXT: mul w12, w12, w8 -; CHECK-NEXT: str w12, [x11], #4 +; CHECK-NEXT: mul w10, w10, w8 +; CHECK-NEXT: str w10, [x11], #4 ; CHECK-NEXT: b.ne .LBB3_7 ; CHECK-NEXT: .LBB3_8: // %for.cond.cleanup ; CHECK-NEXT: ret @@ -304,19 +304,19 @@ ; CHECK-NEXT: cmp w3, #1 ; CHECK-NEXT: b.lt .LBB4_8 ; CHECK-NEXT: // %bb.1: // %for.body.preheader +; CHECK-NEXT: cmp w3, #15 ; CHECK-NEXT: and w8, w1, #0xffff ; CHECK-NEXT: mov w9, w3 -; CHECK-NEXT: cmp w3, #15 ; CHECK-NEXT: b.hi .LBB4_3 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x10, xzr ; CHECK-NEXT: b .LBB4_6 ; CHECK-NEXT: .LBB4_3: // %vector.ph ; CHECK-NEXT: and x10, x9, #0xfffffff0 +; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: add x11, x2, #32 ; CHECK-NEXT: add x12, x0, #16 ; CHECK-NEXT: mov x13, x10 -; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: .LBB4_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp q1, q2, [x12, #-16] @@ -333,15 +333,15 @@ ; CHECK-NEXT: cmp x10, x9 ; CHECK-NEXT: b.eq .LBB4_8 ; CHECK-NEXT: .LBB4_6: // %for.body.preheader1 -; CHECK-NEXT: sub x9, x9, x10 ; CHECK-NEXT: add x11, x2, x10, lsl #2 -; CHECK-NEXT: add x10, x0, x10, lsl #1 +; CHECK-NEXT: add x12, x0, x10, lsl #1 +; CHECK-NEXT: sub x9, x9, x10 ; CHECK-NEXT: .LBB4_7: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrh w12, [x10], #2 +; CHECK-NEXT: ldrh w10, [x12], #2 ; CHECK-NEXT: subs x9, x9, #1 -; CHECK-NEXT: mul w12, w12, w8 -; CHECK-NEXT: str w12, [x11], #4 +; CHECK-NEXT: mul w10, w10, w8 +; CHECK-NEXT: str w10, [x11], #4 ; CHECK-NEXT: b.ne .LBB4_7 ; CHECK-NEXT: .LBB4_8: // %for.cond.cleanup ; CHECK-NEXT: ret @@ -416,8 +416,8 @@ ; CHECK-NEXT: cbz w2, .LBB5_3 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: sxtb w9, w1 -; CHECK-NEXT: mov w10, w2 ; CHECK-NEXT: cmp w2, #15 +; CHECK-NEXT: mov w10, w2 ; CHECK-NEXT: b.hi .LBB5_4 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x11, xzr Index: llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll +++ llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll @@ -15,24 +15,24 @@ ; CHECK-MOPS-NEXT: .cfi_offset w29, -16 ; CHECK-MOPS-NEXT: sub sp, sp, #2016 ; CHECK-MOPS-NEXT: .cfi_def_cfa_offset 2032 +; CHECK-MOPS-NEXT: adrp x8, .LCPI0_0 +; CHECK-MOPS-NEXT: adrp x9, .LCPI0_1 +; CHECK-MOPS-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] ; CHECK-MOPS-NEXT: mov w8, #1000 +; CHECK-MOPS-NEXT: ldr d1, [x9, :lo12:.LCPI0_1] ; CHECK-MOPS-NEXT: add x9, sp, #8 -; CHECK-MOPS-NEXT: adrp x10, .LCPI0_0 -; CHECK-MOPS-NEXT: adrp x11, .LCPI0_1 -; CHECK-MOPS-NEXT: mov w12, #6424 -; CHECK-MOPS-NEXT: mov w13, #7452 ; CHECK-MOPS-NEXT: setp [x9]!, x8!, xzr ; CHECK-MOPS-NEXT: setm [x9]!, x8!, xzr ; CHECK-MOPS-NEXT: sete [x9]!, x8!, xzr -; CHECK-MOPS-NEXT: movk w12, #6938, lsl #16 -; CHECK-MOPS-NEXT: ldr q0, [x10, :lo12:.LCPI0_0] -; CHECK-MOPS-NEXT: mov w8, #30 -; CHECK-MOPS-NEXT: ldr d1, [x11, :lo12:.LCPI0_1] ; CHECK-MOPS-NEXT: add x0, sp, #1008 +; CHECK-MOPS-NEXT: mov w8, #6424 ; CHECK-MOPS-NEXT: add x1, sp, #8 -; CHECK-MOPS-NEXT: str w12, [sp, #1032] -; CHECK-MOPS-NEXT: strh w13, [sp, #1036] +; CHECK-MOPS-NEXT: movk w8, #6938, lsl #16 ; CHECK-MOPS-NEXT: str q0, [sp, #1008] +; CHECK-MOPS-NEXT: str w8, [sp, #1032] +; CHECK-MOPS-NEXT: mov w8, #7452 +; CHECK-MOPS-NEXT: strh w8, [sp, #1036] +; CHECK-MOPS-NEXT: mov w8, #30 ; CHECK-MOPS-NEXT: str d1, [sp, #1024] ; CHECK-MOPS-NEXT: strb w8, [sp, #1038] ; CHECK-MOPS-NEXT: bl fn Index: llvm/test/CodeGen/AArch64/aarch64-smull.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -111,11 +111,11 @@ define <4 x i32> @amull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: amull_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -129,11 +129,11 @@ define <2 x i64> @amull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: amull_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smull v1.2d, v1.2s, v2.2s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -275,12 +275,12 @@ define <4 x i32> @amlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ; CHECK-LABEL: amlal_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlal v1.4s, v0.4h, v2.4h ; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smlal v2.4s, v1.4h, v3.4h -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -296,12 +296,12 @@ define <2 x i64> @amlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { ; CHECK-LABEL: amlal_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlal v1.2d, v0.2s, v2.2s ; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smlal v2.2d, v1.2s, v3.2s -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -445,12 +445,12 @@ define <4 x i32> @amlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ; CHECK-LABEL: amlsl_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlsl v1.4s, v0.4h, v2.4h ; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smlsl v2.4s, v1.4h, v3.4h -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -466,12 +466,12 @@ define <2 x i64> @amlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { ; CHECK-LABEL: amlsl_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlsl v1.2d, v0.2s, v2.2s ; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smlsl v2.2d, v1.2s, v3.2s -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -599,9 +599,9 @@ ; CHECK-LABEL: amull_extvec_v4i16_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1234 +; CHECK-NEXT: dup v1.4h, w8 +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: dup v2.4h, w8 -; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = zext <4 x i16> %arg to <4 x i32> @@ -614,9 +614,9 @@ ; CHECK-LABEL: amull_extvec_v2i32_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1234 +; CHECK-NEXT: dup v1.2s, w8 +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: dup v2.2s, w8 -; CHECK-NEXT: smull v0.2d, v0.2s, v2.2s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = zext <2 x i32> %arg to <2 x i64> Index: llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll +++ llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll @@ -29,8 +29,8 @@ ; CHECK-O2-NEXT: .LBB0_3: // %if.end ; CHECK-O2-NEXT: adrp x9, global_int ; CHECK-O2-NEXT: add x2, x8, #16 -; CHECK-O2-NEXT: mov w0, #10 ; CHECK-O2-NEXT: ldr w1, [x9, :lo12:global_int] +; CHECK-O2-NEXT: mov w0, #10 ; CHECK-O2-NEXT: b externalfunc ; ; CHECK-O3-LABEL: testcase: @@ -44,15 +44,15 @@ ; CHECK-O3-NEXT: ldr x8, [x8, :lo12:global_ptr] ; CHECK-O3-NEXT: adrp x9, global_int ; CHECK-O3-NEXT: add x2, x8, #16 -; CHECK-O3-NEXT: mov w0, #10 ; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int] +; CHECK-O3-NEXT: mov w0, #10 ; CHECK-O3-NEXT: b externalfunc ; CHECK-O3-NEXT: .LBB0_2: ; CHECK-O3-NEXT: mov x8, xzr ; CHECK-O3-NEXT: adrp x9, global_int ; CHECK-O3-NEXT: add x2, x8, #16 -; CHECK-O3-NEXT: mov w0, #10 ; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int] +; CHECK-O3-NEXT: mov w0, #10 ; CHECK-O3-NEXT: b externalfunc entry: %0 = load %a*, %a** @global_ptr, align 8 Index: llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll +++ llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll @@ -8,8 +8,8 @@ ; CHECK-NEXT: add x8, sp, #40 ; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: stp x30, x18, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x3, x4, [sp, #56] ; CHECK-NEXT: stp x1, x2, [sp, #40] +; CHECK-NEXT: stp x3, x4, [sp, #56] ; CHECK-NEXT: stp x5, x6, [sp, #72] ; CHECK-NEXT: str x7, [sp, #88] ; CHECK-NEXT: str x8, [sp, #8] Index: llvm/test/CodeGen/AArch64/active_lane_mask.ll =================================================================== --- llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -131,11 +131,11 @@ ; CHECK-LABEL: lane_mask_nxv2i1_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: and z1.d, z1.d, #0xff -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: add z0.d, z1.d, z0.d ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: mov z2.d, x1 ; CHECK-NEXT: umin z0.d, z0.d, #255 @@ -160,39 +160,39 @@ ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: mov z3.s, w0 +; CHECK-NEXT: mov z2.s, w0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.s, w1 +; CHECK-NEXT: mov z3.s, w1 ; CHECK-NEXT: incw z1.s -; CHECK-NEXT: uqadd z5.s, z0.s, z3.s -; CHECK-NEXT: incw z2.s, all, mul #2 -; CHECK-NEXT: mov z6.d, z1.d -; CHECK-NEXT: cmphi p1.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z1.s, z3.s -; CHECK-NEXT: cmphi p2.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z2.s, z3.s -; CHECK-NEXT: incw z6.s, all, mul #2 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uqadd z4.s, z1.s, z2.s +; CHECK-NEXT: uqadd z5.s, z0.s, z2.s +; CHECK-NEXT: cmphi p1.s, p0/z, z3.s, z4.s +; CHECK-NEXT: cmphi p2.s, p0/z, z3.s, z5.s +; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: mov z5.d, z1.d +; CHECK-NEXT: incw z4.s, all, mul #2 +; CHECK-NEXT: incw z5.s, all, mul #2 +; CHECK-NEXT: uqadd z6.s, z4.s, z2.s +; CHECK-NEXT: uqadd z7.s, z5.s, z2.s ; CHECK-NEXT: incw z0.s, all, mul #4 -; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z6.s, z3.s ; CHECK-NEXT: incw z1.s, all, mul #4 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z0.s, z0.s, z3.s -; CHECK-NEXT: uqadd z1.s, z1.s, z3.s -; CHECK-NEXT: incw z2.s, all, mul #4 -; CHECK-NEXT: incw z6.s, all, mul #4 -; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h +; CHECK-NEXT: cmphi p3.s, p0/z, z3.s, z6.s +; CHECK-NEXT: cmphi p4.s, p0/z, z3.s, z7.s +; CHECK-NEXT: uqadd z0.s, z0.s, z2.s +; CHECK-NEXT: uqadd z1.s, z1.s, z2.s +; CHECK-NEXT: incw z4.s, all, mul #4 +; CHECK-NEXT: incw z5.s, all, mul #4 +; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h ; CHECK-NEXT: uzp1 p2.h, p3.h, p4.h -; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z0.s -; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z1.s -; CHECK-NEXT: uqadd z0.s, z2.s, z3.s -; CHECK-NEXT: uqadd z1.s, z6.s, z3.s +; CHECK-NEXT: cmphi p3.s, p0/z, z3.s, z0.s +; CHECK-NEXT: cmphi p4.s, p0/z, z3.s, z1.s +; CHECK-NEXT: uqadd z0.s, z4.s, z2.s +; CHECK-NEXT: uqadd z1.s, z5.s, z2.s ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: cmphi p5.s, p0/z, z4.s, z0.s -; CHECK-NEXT: cmphi p0.s, p0/z, z4.s, z1.s +; CHECK-NEXT: cmphi p5.s, p0/z, z3.s, z0.s +; CHECK-NEXT: cmphi p0.s, p0/z, z3.s, z1.s ; CHECK-NEXT: uzp1 p3.h, p3.h, p4.h ; CHECK-NEXT: uzp1 p4.h, p5.h, p0.h ; CHECK-NEXT: uzp1 p0.b, p1.b, p2.b @@ -214,76 +214,76 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: mov z3.d, x0 -; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: index z2.d, #0, #1 +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z1.d, x1 +; CHECK-NEXT: incd z3.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.d, x1 -; CHECK-NEXT: incd z1.d -; CHECK-NEXT: uqadd z5.d, z0.d, z3.d -; CHECK-NEXT: uqadd z6.d, z1.d, z3.d -; CHECK-NEXT: cmphi p1.d, p0/z, z4.d, z5.d -; CHECK-NEXT: mov z5.d, z1.d -; CHECK-NEXT: incd z2.d, all, mul #2 -; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z2.d, z3.d -; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: uqadd z4.d, z3.d, z0.d +; CHECK-NEXT: uqadd z5.d, z2.d, z0.d +; CHECK-NEXT: cmphi p1.d, p0/z, z1.d, z4.d +; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: cmphi p2.d, p0/z, z1.d, z5.d +; CHECK-NEXT: incd z4.d, all, mul #2 +; CHECK-NEXT: mov z5.d, z3.d +; CHECK-NEXT: uqadd z6.d, z4.d, z0.d ; CHECK-NEXT: incd z5.d, all, mul #2 -; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s -; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z5.d, z3.d -; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s +; CHECK-NEXT: cmphi p2.d, p0/z, z1.d, z6.d +; CHECK-NEXT: uqadd z6.d, z5.d, z0.d +; CHECK-NEXT: cmphi p3.d, p0/z, z1.d, z6.d +; CHECK-NEXT: mov z6.d, z2.d +; CHECK-NEXT: incd z6.d, all, mul #4 +; CHECK-NEXT: mov z7.d, z3.d +; CHECK-NEXT: uqadd z24.d, z6.d, z0.d ; CHECK-NEXT: incd z7.d, all, mul #4 -; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z7.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: incd z24.d, all, mul #4 ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z26.d, z5.d -; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z24.d, z3.d -; CHECK-NEXT: incd z25.d, all, mul #4 +; CHECK-NEXT: cmphi p4.d, p0/z, z1.d, z24.d +; CHECK-NEXT: uqadd z24.d, z7.d, z0.d ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z25.d, z3.d -; CHECK-NEXT: incd z26.d, all, mul #4 +; CHECK-NEXT: cmphi p5.d, p0/z, z1.d, z24.d +; CHECK-NEXT: mov z24.d, z4.d +; CHECK-NEXT: incd z24.d, all, mul #4 +; CHECK-NEXT: mov z25.d, z5.d +; CHECK-NEXT: uqadd z26.d, z24.d, z0.d +; CHECK-NEXT: incd z25.d, all, mul #4 ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z26.d, z3.d +; CHECK-NEXT: cmphi p6.d, p0/z, z1.d, z26.d +; CHECK-NEXT: uqadd z26.d, z25.d, z0.d ; CHECK-NEXT: uzp1 p2.s, p2.s, p3.s -; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d -; CHECK-NEXT: incd z0.d, all, mul #8 -; CHECK-NEXT: incd z1.d, all, mul #8 +; CHECK-NEXT: cmphi p3.d, p0/z, z1.d, z26.d +; CHECK-NEXT: incd z2.d, all, mul #8 +; CHECK-NEXT: incd z3.d, all, mul #8 ; CHECK-NEXT: uzp1 p4.s, p4.s, p5.s ; CHECK-NEXT: uzp1 p3.s, p6.s, p3.s -; CHECK-NEXT: uqadd z0.d, z0.d, z3.d -; CHECK-NEXT: uqadd z1.d, z1.d, z3.d -; CHECK-NEXT: incd z2.d, all, mul #8 +; CHECK-NEXT: uqadd z2.d, z2.d, z0.d +; CHECK-NEXT: uqadd z3.d, z3.d, z0.d +; CHECK-NEXT: incd z4.d, all, mul #8 ; CHECK-NEXT: incd z5.d, all, mul #8 ; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h ; CHECK-NEXT: uzp1 p2.h, p4.h, p3.h -; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z0.d -; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z2.d, z3.d -; CHECK-NEXT: uqadd z1.d, z5.d, z3.d +; CHECK-NEXT: cmphi p3.d, p0/z, z1.d, z2.d +; CHECK-NEXT: cmphi p4.d, p0/z, z1.d, z3.d +; CHECK-NEXT: uqadd z2.d, z4.d, z0.d +; CHECK-NEXT: uqadd z3.d, z5.d, z0.d +; CHECK-NEXT: incd z6.d, all, mul #8 +; CHECK-NEXT: cmphi p5.d, p0/z, z1.d, z2.d +; CHECK-NEXT: cmphi p6.d, p0/z, z1.d, z3.d ; CHECK-NEXT: incd z7.d, all, mul #8 +; CHECK-NEXT: uqadd z2.d, z6.d, z0.d ; CHECK-NEXT: incd z24.d, all, mul #8 -; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d -; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z7.d, z3.d -; CHECK-NEXT: uqadd z1.d, z24.d, z3.d ; CHECK-NEXT: incd z25.d, all, mul #8 -; CHECK-NEXT: incd z26.d, all, mul #8 ; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s ; CHECK-NEXT: uzp1 p4.s, p5.s, p6.s -; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d -; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z25.d, z3.d -; CHECK-NEXT: uqadd z1.d, z26.d, z3.d +; CHECK-NEXT: uqadd z3.d, z7.d, z0.d +; CHECK-NEXT: cmphi p5.d, p0/z, z1.d, z2.d +; CHECK-NEXT: uqadd z2.d, z24.d, z0.d +; CHECK-NEXT: uqadd z0.d, z25.d, z0.d ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: cmphi p7.d, p0/z, z4.d, z0.d -; CHECK-NEXT: cmphi p0.d, p0/z, z4.d, z1.d +; CHECK-NEXT: cmphi p6.d, p0/z, z1.d, z3.d +; CHECK-NEXT: cmphi p7.d, p0/z, z1.d, z2.d +; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d ; CHECK-NEXT: uzp1 p5.s, p5.s, p6.s ; CHECK-NEXT: uzp1 p0.s, p7.s, p0.s ; CHECK-NEXT: uzp1 p3.h, p3.h, p4.h @@ -306,15 +306,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: index z0.b, #0, #1 -; CHECK-NEXT: mov z1.b, w8 -; CHECK-NEXT: mov z2.b, w0 -; CHECK-NEXT: add z1.b, z0.b, z1.b -; CHECK-NEXT: mov z3.b, w1 -; CHECK-NEXT: uqadd z0.b, z0.b, z2.b +; CHECK-NEXT: mov z1.b, w0 +; CHECK-NEXT: mov z3.b, w8 +; CHECK-NEXT: uqadd z2.b, z0.b, z1.b +; CHECK-NEXT: add z0.b, z0.b, z3.b +; CHECK-NEXT: mov z4.b, w1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: uqadd z1.b, z1.b, z2.b -; CHECK-NEXT: cmphi p0.b, p1/z, z3.b, z0.b -; CHECK-NEXT: cmphi p1.b, p1/z, z3.b, z1.b +; CHECK-NEXT: uqadd z0.b, z0.b, z1.b +; CHECK-NEXT: cmphi p0.b, p1/z, z4.b, z2.b +; CHECK-NEXT: cmphi p1.b, p1/z, z4.b, z0.b ; CHECK-NEXT: ret %active.lane.mask = call @llvm.get.active.lane.mask.nxv32i1.i8(i8 %index, i8 %TC) ret %active.lane.mask @@ -430,8 +430,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: dup v0.8b, w0 -; CHECK-NEXT: dup v2.8b, w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI24_0] +; CHECK-NEXT: dup v2.8b, w1 ; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: cmhi v0.8b, v2.8b, v0.8b ; CHECK-NEXT: ret @@ -442,14 +442,14 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_v4i1_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: dup v0.4h, w0 +; CHECK-NEXT: adrp x8, .LCPI25_0 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: movi d2, #0xff00ff00ff00ff ; CHECK-NEXT: dup v3.4h, w1 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-NEXT: bic v3.4h, #255, lsl #8 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: bic v3.4h, #255, lsl #8 ; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h ; CHECK-NEXT: cmhi v0.4h, v3.4h, v0.4h ; CHECK-NEXT: ret @@ -460,9 +460,9 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_v2i1_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: movi d0, #0x0000ff000000ff ; CHECK-NEXT: dup v1.2s, w0 +; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: dup v3.2s, w1 ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI26_0] ; CHECK-NEXT: and v1.8b, v1.8b, v0.8b Index: llvm/test/CodeGen/AArch64/align-down.ll =================================================================== --- llvm/test/CodeGen/AArch64/align-down.ll +++ llvm/test/CodeGen/AArch64/align-down.ll @@ -84,8 +84,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub w8, w1, #1 ; CHECK-NEXT: and w9, w0, w8 -; CHECK-NEXT: sub w0, w0, w9 ; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: sub w0, w0, w9 ; CHECK-NEXT: str w9, [x3] ; CHECK-NEXT: ret %mask = add i32 %alignment, -1 Index: llvm/test/CodeGen/AArch64/and-mask-removal.ll =================================================================== --- llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -21,8 +21,8 @@ ; CHECK-NEXT: adrp x9, _next_string@GOTPAGE ; CHECK-NEXT: adrp x10, _string_number@GOTPAGE ; CHECK-NEXT: ldr x9, [x9, _next_string@GOTPAGEOFF] -; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: ldr x10, [x10, _string_number@GOTPAGEOFF] +; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: str w9, [x10, x8, lsl #2] ; CHECK-NEXT: LBB0_2: ; %if.end ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/andorbrcompare.ll =================================================================== --- llvm/test/CodeGen/AArch64/andorbrcompare.ll +++ llvm/test/CodeGen/AArch64/andorbrcompare.ll @@ -8,15 +8,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w2, w3 ; CHECK-NEXT: ccmp w0, w1, #0, ne -; CHECK-NEXT: ccmp w4, w5, #0, ne -; CHECK-NEXT: b.hs .LBB0_2 -; CHECK-NEXT: // %bb.1: // %if +; CHECK-NEXT: b.eq .LBB0_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cmp w4, w5 +; CHECK-NEXT: b.lo .LBB0_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_3: // %if ; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: str w0, [x6] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 %c1 = icmp ne i32 %s2, %s3 @@ -38,15 +40,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w2, w3 ; CHECK-NEXT: ccmp w0, w1, #4, lo -; CHECK-NEXT: ccmp w4, w5, #0, eq -; CHECK-NEXT: b.hi .LBB1_2 -; CHECK-NEXT: // %bb.1: // %if +; CHECK-NEXT: b.ne .LBB1_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cmp w4, w5 +; CHECK-NEXT: b.ls .LBB1_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_3: // %if ; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: str w0, [x6] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 %c1 = icmp ult i32 %s2, %s3 @@ -68,15 +72,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w2, w3 ; CHECK-NEXT: ccmp w0, w1, #2, ls -; CHECK-NEXT: ccmp w4, w5, #2, hs -; CHECK-NEXT: b.ls .LBB2_2 -; CHECK-NEXT: // %bb.1: // %if +; CHECK-NEXT: b.lo .LBB2_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cmp w4, w5 +; CHECK-NEXT: b.hi .LBB2_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_3: // %if ; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: str w0, [x6] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 %c1 = icmp ule i32 %s2, %s3 @@ -98,15 +104,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w2, w3 ; CHECK-NEXT: ccmp w0, w1, #2, hi -; CHECK-NEXT: ccmp w4, w5, #2, hi -; CHECK-NEXT: b.lo .LBB3_2 -; CHECK-NEXT: // %bb.1: // %if +; CHECK-NEXT: b.ls .LBB3_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cmp w4, w5 +; CHECK-NEXT: b.hs .LBB3_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_3: // %if ; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: str w0, [x6] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 %c1 = icmp ugt i32 %s2, %s3 @@ -128,15 +136,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w2, w3 ; CHECK-NEXT: ccmp w0, w1, #0, hs -; CHECK-NEXT: ccmp w4, w5, #8, ls -; CHECK-NEXT: b.ge .LBB4_2 -; CHECK-NEXT: // %bb.1: // %if +; CHECK-NEXT: b.hi .LBB4_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cmp w4, w5 +; CHECK-NEXT: b.lt .LBB4_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_3: // %if ; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: str w0, [x6] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 %c1 = icmp uge i32 %s2, %s3 @@ -158,15 +168,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w2, w3 ; CHECK-NEXT: ccmp w0, w1, #0, lt -; CHECK-NEXT: ccmp w4, w5, #4, lo -; CHECK-NEXT: b.gt .LBB5_2 -; CHECK-NEXT: // %bb.1: // %if +; CHECK-NEXT: b.hs .LBB5_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cmp w4, w5 +; CHECK-NEXT: b.le .LBB5_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_3: // %if ; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: str w0, [x6] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 %c1 = icmp slt i32 %s2, %s3 @@ -188,15 +200,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w2, w3 ; CHECK-NEXT: ccmp w0, w1, #0, le -; CHECK-NEXT: ccmp w4, w5, #0, ge -; CHECK-NEXT: b.le .LBB6_2 -; CHECK-NEXT: // %bb.1: // %if +; CHECK-NEXT: b.lt .LBB6_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cmp w4, w5 +; CHECK-NEXT: b.gt .LBB6_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_3: // %if ; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: str w0, [x6] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 %c1 = icmp sle i32 %s2, %s3 @@ -218,15 +232,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w2, w3 ; CHECK-NEXT: ccmp w0, w1, #0, gt -; CHECK-NEXT: ccmp w4, w5, #0, gt -; CHECK-NEXT: b.lt .LBB7_2 -; CHECK-NEXT: // %bb.1: // %if +; CHECK-NEXT: b.le .LBB7_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cmp w4, w5 +; CHECK-NEXT: b.ge .LBB7_3 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_3: // %if ; CHECK-NEXT: mov w0, #1 ; CHECK-NEXT: str w0, [x6] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 %c1 = icmp sgt i32 %s2, %s3 Index: llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll +++ llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll @@ -5,19 +5,19 @@ define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: bar: ; CHECK: // %bb.0: -; CHECK-NEXT: add.2d v0, v0, v1 -; CHECK-NEXT: sub d2, d0, d1 -; CHECK-NEXT: add d0, d0, d1 -; CHECK-NEXT: fmov x8, d2 +; CHECK-NEXT: add.2d v2, v0, v1 +; CHECK-NEXT: add d0, d2, d1 +; CHECK-NEXT: sub d1, d2, d1 +; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: mov.d v0[1], x8 ; CHECK-NEXT: ret ; ; GENERIC-LABEL: bar: ; GENERIC: // %bb.0: -; GENERIC-NEXT: add v0.2d, v0.2d, v1.2d -; GENERIC-NEXT: sub d2, d0, d1 -; GENERIC-NEXT: add d0, d0, d1 -; GENERIC-NEXT: fmov x8, d2 +; GENERIC-NEXT: add v2.2d, v0.2d, v1.2d +; GENERIC-NEXT: add d0, d2, d1 +; GENERIC-NEXT: sub d1, d2, d1 +; GENERIC-NEXT: fmov x8, d1 ; GENERIC-NEXT: mov v0.d[1], x8 ; GENERIC-NEXT: ret %add = add <2 x i64> %a, %b @@ -68,16 +68,16 @@ define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: add_sub_su64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d2, xzr ; CHECK-NEXT: add d0, d1, d0 -; CHECK-NEXT: sub d0, d2, d0 +; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: sub d0, d1, d0 ; CHECK-NEXT: ret ; ; GENERIC-LABEL: add_sub_su64: ; GENERIC: // %bb.0: -; GENERIC-NEXT: fmov d2, xzr ; GENERIC-NEXT: add d0, d1, d0 -; GENERIC-NEXT: sub d0, d2, d0 +; GENERIC-NEXT: fmov d1, xzr +; GENERIC-NEXT: sub d0, d1, d0 ; GENERIC-NEXT: ret %vecext = extractelement <2 x i64> %a, i32 0 %vecext1 = extractelement <2 x i64> %b, i32 0 Index: llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll +++ llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll @@ -10,28 +10,28 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) { ; CHECK-LABEL: fullGtU: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: adrp x10, _block@GOTPAGE +; CHECK-NEXT: adrp x8, _block@GOTPAGE ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: ldr x10, [x10, _block@GOTPAGEOFF] -; CHECK-NEXT: ldr x10, [x10] -; CHECK-NEXT: ldrb w11, [x10, x8] -; CHECK-NEXT: ldrb w12, [x10, x9] +; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: ldr x8, [x8, _block@GOTPAGEOFF] +; CHECK-NEXT: sxtw x10, w1 +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: ldrb w11, [x8, x9] +; CHECK-NEXT: ldrb w12, [x8, x10] ; CHECK-NEXT: cmp w11, w12 ; CHECK-NEXT: b.ne LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %if.end -; CHECK-NEXT: add x8, x8, x10 -; CHECK-NEXT: add x9, x9, x10 -; CHECK-NEXT: ldrb w10, [x8, #1] -; CHECK-NEXT: ldrb w11, [x9, #1] +; CHECK-NEXT: add x9, x9, x8 +; CHECK-NEXT: add x8, x10, x8 +; CHECK-NEXT: ldrb w10, [x9, #1] +; CHECK-NEXT: ldrb w11, [x8, #1] ; CHECK-NEXT: cmp w10, w11 ; CHECK-NEXT: b.ne LBB0_3 ; CHECK-NEXT: ; %bb.2: ; %if.end25 -; CHECK-NEXT: ldrb w8, [x8, #2] ; CHECK-NEXT: ldrb w9, [x9, #2] -; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ldrb w8, [x8, #2] +; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: cset w8, hi ; CHECK-NEXT: csel w0, wzr, w8, eq ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/arm64-addrmode.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -67,9 +67,9 @@ define void @t6(i64 %a, i64* %object) { ; CHECK-LABEL: t6: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 -; CHECK-NEXT: add x9, x1, x0, lsl #3 -; CHECK-NEXT: ldr xzr, [x9, x8] +; CHECK-NEXT: add x8, x1, x0, lsl #3 +; CHECK-NEXT: mov w9, #32768 +; CHECK-NEXT: ldr xzr, [x8, x9] ; CHECK-NEXT: ret %tmp1 = getelementptr inbounds i64, i64* %object, i64 %a %incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096 Index: llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll +++ llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll @@ -896,18 +896,18 @@ ; LLC-LABEL: fct20: ; LLC: // %bb.0: // %entry ; LLC-NEXT: mov x12, #11776 -; LLC-NEXT: extr x9, x1, x0, #18 +; LLC-NEXT: extr x8, x1, x0, #18 ; LLC-NEXT: movk x12, #25856, lsl #16 -; LLC-NEXT: lsr x8, x1, #18 +; LLC-NEXT: lsr x9, x1, #18 ; LLC-NEXT: movk x12, #11077, lsl #32 ; LLC-NEXT: orr x10, x2, x3 ; LLC-NEXT: mov w11, #26220 ; LLC-NEXT: movk x12, #45, lsl #48 -; LLC-NEXT: and x11, x8, x11 -; LLC-NEXT: and x12, x9, x12 +; LLC-NEXT: and x11, x9, x11 +; LLC-NEXT: and x12, x8, x12 ; LLC-NEXT: cmp x10, #0 -; LLC-NEXT: csel x0, x12, x9, eq -; LLC-NEXT: csel x1, x11, x8, eq +; LLC-NEXT: csel x0, x12, x8, eq +; LLC-NEXT: csel x1, x11, x9, eq ; LLC-NEXT: ret ; OPT-LABEL: @fct20( ; OPT-NEXT: entry: Index: llvm/test/CodeGen/AArch64/arm64-cse.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-cse.ll +++ llvm/test/CodeGen/AArch64/arm64-cse.ll @@ -15,8 +15,8 @@ ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_2: ; %if.end -; CHECK-NEXT: sub w9, w9, w8 ; CHECK-NEXT: add x0, x0, w8, sxtw +; CHECK-NEXT: sub w9, w9, w8 ; CHECK-NEXT: str w9, [x1] ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/arm64-fcopysign.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fcopysign.ll +++ llvm/test/CodeGen/AArch64/arm64-fcopysign.ll @@ -36,11 +36,11 @@ define double @test3(double %a, float %b, float %c) nounwind { ; CHECK-LABEL: test3: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.2d v3, #0xffffffffffffffff -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fadd s1, s1, s2 -; CHECK-NEXT: fneg.2d v2, v3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff ; CHECK-NEXT: fcvt d1, s1 +; CHECK-NEXT: fneg.2d v2, v2 ; CHECK-NEXT: bif.16b v0, v1, v2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/arm64-fmadd.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fmadd.ll +++ llvm/test/CodeGen/AArch64/arm64-fmadd.ll @@ -225,8 +225,8 @@ ; CHECK-NEXT: mov w8, #-1037565952 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov w8, #1109917696 -; CHECK-NEXT: fmul s1, s0, s1 ; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fmul s1, s0, s1 ; CHECK-NEXT: fmadd s0, s0, s2, s1 ; CHECK-NEXT: ret %m = fmul float %x, 42.0 Index: llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll +++ llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll @@ -20,12 +20,12 @@ ; CHECK-NEXT: .cfi_offset b10, -56 ; CHECK-NEXT: .cfi_offset b11, -64 ; CHECK-NEXT: fmov s3, #1.00000000 -; CHECK-NEXT: scvtf s4, w0 ; CHECK-NEXT: sub w19, w0, #1 ; CHECK-NEXT: fadd s8, s0, s3 ; CHECK-NEXT: fadd s0, s8, s1 +; CHECK-NEXT: scvtf s1, w0 ; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: fsub s9, s0, s4 +; CHECK-NEXT: fsub s9, s0, s1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __Z3goof ; CHECK-NEXT: fmov s10, s0 @@ -59,12 +59,12 @@ ; CHECK-LINUX-NEXT: .cfi_offset b10, -56 ; CHECK-LINUX-NEXT: .cfi_offset b11, -64 ; CHECK-LINUX-NEXT: fmov s3, #1.00000000 -; CHECK-LINUX-NEXT: scvtf s4, w0 ; CHECK-LINUX-NEXT: sub w19, w0, #1 ; CHECK-LINUX-NEXT: fadd s8, s0, s3 ; CHECK-LINUX-NEXT: fadd s0, s8, s1 +; CHECK-LINUX-NEXT: scvtf s1, w0 ; CHECK-LINUX-NEXT: fadd s0, s0, s2 -; CHECK-LINUX-NEXT: fsub s9, s0, s4 +; CHECK-LINUX-NEXT: fsub s9, s0, s1 ; CHECK-LINUX-NEXT: fmov s0, s8 ; CHECK-LINUX-NEXT: bl _Z3goof ; CHECK-LINUX-NEXT: fmov s10, s0 Index: llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -3909,8 +3909,8 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-LABEL: test_v8i16_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -3941,8 +3941,8 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-LABEL: test_v4i16_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -3973,8 +3973,8 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-LABEL: test_v4i32_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4005,8 +4005,8 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-LABEL: test_v2i32_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4037,8 +4037,8 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-LABEL: test_v2i64_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4069,8 +4069,8 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-LABEL: test_v1i64_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4101,8 +4101,8 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-LABEL: test_v4f32_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4133,8 +4133,8 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-LABEL: test_v2f32_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4165,8 +4165,8 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-LABEL: test_v2f64_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4197,8 +4197,8 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-LABEL: test_v1f64_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -5456,8 +5456,8 @@ define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v8i16_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.8h { v0, v1, v2 }, [x0], x8 @@ -5486,8 +5486,8 @@ define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i16_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st3.4h { v0, v1, v2 }, [x0], x8 @@ -5516,8 +5516,8 @@ define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i32_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 @@ -5546,8 +5546,8 @@ define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i32_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 @@ -5576,8 +5576,8 @@ define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i64_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 @@ -5606,8 +5606,8 @@ define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1i64_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 @@ -5636,8 +5636,8 @@ define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4f32_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 @@ -5666,8 +5666,8 @@ define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f32_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 @@ -5696,8 +5696,8 @@ define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f64_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 @@ -5726,8 +5726,8 @@ define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1f64_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 @@ -6530,8 +6530,8 @@ define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v8i16_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st1.8h { v0, v1, v2 }, [x0], x8 @@ -6560,8 +6560,8 @@ define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i16_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.4h { v0, v1, v2 }, [x0], x8 @@ -6590,8 +6590,8 @@ define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i32_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 @@ -6620,8 +6620,8 @@ define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i32_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 @@ -6650,8 +6650,8 @@ define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i64_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 @@ -6680,8 +6680,8 @@ define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1i64_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 @@ -6710,8 +6710,8 @@ define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4f32_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 @@ -6740,8 +6740,8 @@ define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f32_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 @@ -6770,8 +6770,8 @@ define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f64_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 @@ -6800,8 +6800,8 @@ define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1f64_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 @@ -7603,8 +7603,8 @@ define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v8i16_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 @@ -7633,8 +7633,8 @@ define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i16_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 @@ -7663,8 +7663,8 @@ define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i32_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 @@ -7693,8 +7693,8 @@ define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i32_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 @@ -7723,8 +7723,8 @@ define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i64_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 @@ -7753,8 +7753,8 @@ define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1i64_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 @@ -7783,8 +7783,8 @@ define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4f32_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 @@ -7813,8 +7813,8 @@ define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f32_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 @@ -7843,8 +7843,8 @@ define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f64_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 @@ -7873,8 +7873,8 @@ define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1f64_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 @@ -8912,8 +8912,8 @@ ; CHECK-NEXT: str q0, [x3] ; CHECK-NEXT: ldr q0, [x4] ; CHECK-NEXT: add x8, x0, x2, lsl #2 -; CHECK-NEXT: mov.s v0[1], v1[0] ; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: mov.s v0[1], v1[0] ; CHECK-NEXT: ret %tmp1 = load float, float* %bar store <4 x float> %vec, <4 x float>* %dep_ptr_1, align 16 @@ -8960,11 +8960,11 @@ define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, <2 x i32>* %out) { ; CHECK-LABEL: test_ld1lane_build: ; CHECK: ; %bb.0: -; CHECK-NEXT: ldr s0, [x2] -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: ld1.s { v0 }[1], [x3] -; CHECK-NEXT: ld1.s { v1 }[1], [x1] -; CHECK-NEXT: sub.2s v0, v1, v0 +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x2] +; CHECK-NEXT: ld1.s { v0 }[1], [x1] +; CHECK-NEXT: ld1.s { v1 }[1], [x3] +; CHECK-NEXT: sub.2s v0, v0, v1 ; CHECK-NEXT: str d0, [x4] ; CHECK-NEXT: ret %load0 = load i32, i32* %ptr0, align 4 @@ -9071,10 +9071,10 @@ ; CHECK-LABEL: test_inc_cycle: ; CHECK: ; %bb.0: ; CHECK-NEXT: ld1.s { v0 }[0], [x0] -; CHECK-NEXT: adrp x8, _var@PAGE -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: add x9, x0, x9, lsl #2 -; CHECK-NEXT: str x9, [x8, _var@PAGEOFF] +; CHECK-NEXT: adrp x9, _var@PAGE +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x8, x0, x8, lsl #2 +; CHECK-NEXT: str x8, [x9, _var@PAGEOFF] ; CHECK-NEXT: ret %elt = load i32, i32* %in %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0 @@ -9096,8 +9096,8 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: bfxil x8, x1, #0, #4 ; CHECK-NEXT: str q0, [sp] @@ -9114,9 +9114,9 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: and x8, x1, #0x7 -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: bfi x9, x8, #1, #3 ; CHECK-NEXT: str q0, [sp] @@ -9144,10 +9144,10 @@ ; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align: ; CHECK: ; %bb.0: ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: cmp x9, #2 -; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: cmp x8, #2 +; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: ldr w0, [x0, x8, lsl #2] ; CHECK-NEXT: ret %lv = load <3 x i32>, <3 x i32>* %A, align 2 @@ -9159,10 +9159,10 @@ ; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align: ; CHECK: ; %bb.0: ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: cmp x9, #2 -; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: cmp x8, #2 +; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: ldr w0, [x0, x8, lsl #2] ; CHECK-NEXT: ret %lv = load <3 x i32>, <3 x i32>* %A Index: llvm/test/CodeGen/AArch64/arm64-inline-asm.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-inline-asm.ll +++ llvm/test/CodeGen/AArch64/arm64-inline-asm.ll @@ -368,10 +368,10 @@ define void @test_zero_reg(i32* %addr) { ; CHECK-LABEL: test_zero_reg: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: USE(xzr) ; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: USE(wzr) ; CHECK-NEXT: ; InlineAsm End Index: llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll +++ llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll @@ -36,9 +36,9 @@ ; CHECK-NEXT: b.eq LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %else ; CHECK-NEXT: mul w9, w0, w1 -; CHECK-NEXT: mov w10, #10 ; CHECK-NEXT: mul w0, w9, w1 -; CHECK-NEXT: str w10, [x8] +; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: str w9, [x8] ; CHECK-NEXT: LBB0_2: ; %common.ret ; CHECK-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/arm64-ldp.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-ldp.ll +++ llvm/test/CodeGen/AArch64/arm64-ldp.ll @@ -312,10 +312,10 @@ ; are used---just check that there isn't an ldp before the add ; CHECK-LABEL: pairUpBarelyOut: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #264 -; CHECK-NEXT: ldur x9, [x0, #-256] -; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: add x0, x9, x8 +; CHECK-NEXT: sub x9, x0, #264 +; CHECK-NEXT: ldur x8, [x0, #-256] +; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: add x0, x8, x9 ; CHECK-NEXT: ret %p1 = getelementptr inbounds i64, i64* %a, i64 -32 %tmp1 = load i64, i64* %p1, align 2 @@ -330,10 +330,10 @@ ; are used---just check that there isn't an ldp before the add ; CHECK-LABEL: pairUpBarelyOutSext: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #260 -; CHECK-NEXT: ldursw x9, [x0, #-256] -; CHECK-NEXT: ldrsw x8, [x8] -; CHECK-NEXT: add x0, x9, x8 +; CHECK-NEXT: sub x9, x0, #260 +; CHECK-NEXT: ldursw x8, [x0, #-256] +; CHECK-NEXT: ldrsw x9, [x9] +; CHECK-NEXT: add x0, x8, x9 ; CHECK-NEXT: ret %p1 = getelementptr inbounds i32, i32* %a, i64 -64 %tmp1 = load i32, i32* %p1, align 2 @@ -422,10 +422,8 @@ ; CHECK-LABEL: ldp_sext_int_post: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldpsw x19, x20, [x0] -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ldpsw x19, x20, [x0], #8 ; CHECK-NEXT: bl "use-ptr" ; CHECK-NEXT: add x0, x20, x19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload Index: llvm/test/CodeGen/AArch64/arm64-neon-copy.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1225,8 +1225,8 @@ ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: and x8, x0, #0x7 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: str q0, [sp] ; CHECK-NEXT: bfi x9, x8, #1, #3 +; CHECK-NEXT: str q0, [sp] ; CHECK-NEXT: ldr h1, [x9] ; CHECK-NEXT: mov v1.h[1], v0.h[1] ; CHECK-NEXT: mov v1.h[2], v0.h[2] @@ -1251,10 +1251,10 @@ ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: and x8, x0, #0x3 -; CHECK-NEXT: add x9, sp, #8 -; CHECK-NEXT: bfi x9, x8, #1, #2 -; CHECK-NEXT: str h0, [x9] +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: and x9, x0, #0x3 +; CHECK-NEXT: bfi x8, x9, #1, #2 +; CHECK-NEXT: str h0, [x8] ; CHECK-NEXT: ldr d1, [sp, #8] ; CHECK-NEXT: mov v1.h[1], v0.h[1] ; CHECK-NEXT: mov v1.h[2], v0.h[2] Index: llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll +++ llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll @@ -73,13 +73,13 @@ define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { ; CHECK-LABEL: mul2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: mov x8, v1.d[1] -; CHECK-NEXT: mov x11, v0.d[1] -; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 -; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: mov x9, v0.d[1] +; CHECK-NEXT: mul x10, x11, x10 +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %tmp3 = mul <2 x i64> %A, %B; @@ -164,6 +164,7 @@ ; CHECK-NEXT: smov w11, v0.b[2] ; CHECK-NEXT: smov w12, v0.b[3] ; CHECK-NEXT: smov w13, v0.b[4] +; CHECK-NEXT: smov w14, v0.b[5] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: sdiv w9, w10, w9 @@ -171,18 +172,17 @@ ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.b[5] +; CHECK-NEXT: smov w9, v1.b[6] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 ; CHECK-NEXT: smov w10, v0.b[6] ; CHECK-NEXT: sdiv w12, w13, w12 -; CHECK-NEXT: smov w13, v0.b[5] +; CHECK-NEXT: smov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 ; CHECK-NEXT: smov w11, v0.b[7] -; CHECK-NEXT: sdiv w8, w13, w9 -; CHECK-NEXT: smov w9, v1.b[6] +; CHECK-NEXT: sdiv w8, w14, w13 ; CHECK-NEXT: mov v2.b[4], w12 ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[7] @@ -211,12 +211,13 @@ ; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: smov w16, v0.b[7] ; CHECK-NEXT: smov w17, v0.b[8] +; CHECK-NEXT: smov w18, v0.b[9] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.b[9] +; CHECK-NEXT: smov w9, v1.b[10] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] @@ -238,10 +239,9 @@ ; CHECK-NEXT: smov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: sdiv w16, w17, w16 -; CHECK-NEXT: smov w17, v0.b[9] +; CHECK-NEXT: smov w17, v1.b[9] ; CHECK-NEXT: mov v2.b[7], w15 -; CHECK-NEXT: sdiv w8, w17, w9 -; CHECK-NEXT: smov w9, v1.b[10] +; CHECK-NEXT: sdiv w8, w18, w17 ; CHECK-NEXT: mov v2.b[8], w16 ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[11] @@ -319,6 +319,7 @@ ; CHECK-NEXT: smov w11, v0.h[2] ; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: smov w13, v0.h[4] +; CHECK-NEXT: smov w14, v0.h[5] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.h[0] ; CHECK-NEXT: sdiv w9, w10, w9 @@ -326,18 +327,17 @@ ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.h[5] +; CHECK-NEXT: smov w9, v1.h[6] ; CHECK-NEXT: mov v2.h[1], w8 ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 ; CHECK-NEXT: smov w10, v0.h[6] ; CHECK-NEXT: sdiv w12, w13, w12 -; CHECK-NEXT: smov w13, v0.h[5] +; CHECK-NEXT: smov w13, v1.h[5] ; CHECK-NEXT: mov v2.h[3], w11 ; CHECK-NEXT: smov w11, v0.h[7] -; CHECK-NEXT: sdiv w8, w13, w9 -; CHECK-NEXT: smov w9, v1.h[6] +; CHECK-NEXT: sdiv w8, w14, w13 ; CHECK-NEXT: mov v2.h[4], w12 ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[7] @@ -463,6 +463,7 @@ ; CHECK-NEXT: umov w11, v0.b[2] ; CHECK-NEXT: umov w12, v0.b[3] ; CHECK-NEXT: umov w13, v0.b[4] +; CHECK-NEXT: umov w14, v0.b[5] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: udiv w9, w10, w9 @@ -470,18 +471,17 @@ ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.b[5] +; CHECK-NEXT: umov w9, v1.b[6] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 ; CHECK-NEXT: umov w10, v0.b[6] ; CHECK-NEXT: udiv w12, w13, w12 -; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 ; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: udiv w8, w13, w9 -; CHECK-NEXT: umov w9, v1.b[6] +; CHECK-NEXT: udiv w8, w14, w13 ; CHECK-NEXT: mov v2.b[4], w12 ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[7] @@ -510,12 +510,13 @@ ; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w16, v0.b[7] ; CHECK-NEXT: umov w17, v0.b[8] +; CHECK-NEXT: umov w18, v0.b[9] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.b[9] +; CHECK-NEXT: umov w9, v1.b[10] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] @@ -537,10 +538,9 @@ ; CHECK-NEXT: umov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: udiv w16, w17, w16 -; CHECK-NEXT: umov w17, v0.b[9] +; CHECK-NEXT: umov w17, v1.b[9] ; CHECK-NEXT: mov v2.b[7], w15 -; CHECK-NEXT: udiv w8, w17, w9 -; CHECK-NEXT: umov w9, v1.b[10] +; CHECK-NEXT: udiv w8, w18, w17 ; CHECK-NEXT: mov v2.b[8], w16 ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[11] @@ -618,6 +618,7 @@ ; CHECK-NEXT: umov w11, v0.h[2] ; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: umov w13, v0.h[4] +; CHECK-NEXT: umov w14, v0.h[5] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.h[0] ; CHECK-NEXT: udiv w9, w10, w9 @@ -625,18 +626,17 @@ ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.h[5] +; CHECK-NEXT: umov w9, v1.h[6] ; CHECK-NEXT: mov v2.h[1], w8 ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 ; CHECK-NEXT: umov w10, v0.h[6] ; CHECK-NEXT: udiv w12, w13, w12 -; CHECK-NEXT: umov w13, v0.h[5] +; CHECK-NEXT: umov w13, v1.h[5] ; CHECK-NEXT: mov v2.h[3], w11 ; CHECK-NEXT: umov w11, v0.h[7] -; CHECK-NEXT: udiv w8, w13, w9 -; CHECK-NEXT: umov w9, v1.h[6] +; CHECK-NEXT: udiv w8, w14, w13 ; CHECK-NEXT: mov v2.h[4], w12 ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[7] @@ -768,34 +768,34 @@ ; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.b[5] +; CHECK-NEXT: smov w4, v1.b[5] +; CHECK-NEXT: smov w5, v0.b[5] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.b[5] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: smov w13, v1.b[7] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: smov w11, v0.b[6] +; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: smov w10, v1.b[6] -; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.b[7] ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.b[7] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: smov w14, v0.b[7] ; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: sdiv w12, w16, w15 +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: mov v2.b[3], w8 +; CHECK-NEXT: sdiv w9, w5, w4 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: mov v2.b[4], w8 +; CHECK-NEXT: sdiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w4, w5 +; CHECK-NEXT: mov v2.b[5], w8 +; CHECK-NEXT: sdiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.b[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret @@ -806,11 +806,12 @@ define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: srem16x8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -819,6 +820,8 @@ ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 ; CHECK-NEXT: smov w11, v1.b[0] ; CHECK-NEXT: smov w12, v0.b[0] ; CHECK-NEXT: smov w8, v1.b[1] @@ -838,72 +841,73 @@ ; CHECK-NEXT: smov w22, v0.b[7] ; CHECK-NEXT: smov w24, v1.b[8] ; CHECK-NEXT: smov w25, v0.b[8] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.b[9] +; CHECK-NEXT: smov w27, v1.b[9] +; CHECK-NEXT: smov w28, v0.b[9] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.b[9] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: smov w13, v1.b[11] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: smov w11, v0.b[10] +; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: smov w10, v1.b[10] -; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.b[11] ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.b[11] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: smov w14, v0.b[11] +; CHECK-NEXT: smov w16, v1.b[12] ; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 -; CHECK-NEXT: smov w18, v1.b[12] ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: smov w0, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: smov w2, v1.b[13] +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: smov w17, v0.b[12] +; CHECK-NEXT: smov w0, v1.b[13] +; CHECK-NEXT: mov v2.b[3], w8 ; CHECK-NEXT: sdiv w6, w5, w4 -; CHECK-NEXT: smov w3, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w17, w6, w4, w5 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: smov w1, v0.b[13] +; CHECK-NEXT: mov v2.b[4], w8 ; CHECK-NEXT: sdiv w20, w19, w7 -; CHECK-NEXT: mov v2.b[5], w17 -; CHECK-NEXT: msub w17, w20, w7, w19 +; CHECK-NEXT: msub w8, w6, w4, w5 +; CHECK-NEXT: mov v2.b[5], w8 ; CHECK-NEXT: sdiv w23, w22, w21 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[6], w17 -; CHECK-NEXT: msub w1, w23, w21, w22 +; CHECK-NEXT: msub w8, w20, w7, w19 +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[6], w8 ; CHECK-NEXT: sdiv w26, w25, w24 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[7], w1 -; CHECK-NEXT: msub w1, w26, w24, w25 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[8], w1 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: smov w13, v1.b[15] -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: smov w9, v1.b[14] -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: smov w10, v0.b[14] -; CHECK-NEXT: sdiv w14, w16, w15 +; CHECK-NEXT: msub w8, w23, w21, w22 +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[7], w8 +; CHECK-NEXT: sdiv w9, w28, w27 +; CHECK-NEXT: msub w8, w26, w24, w25 +; CHECK-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[8], w8 +; CHECK-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: sdiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w27, w28 +; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: sdiv w15, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 +; CHECK-NEXT: smov w10, v1.b[14] +; CHECK-NEXT: smov w11, v0.b[14] ; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: msub w11, w14, w15, w16 +; CHECK-NEXT: sdiv w18, w17, w16 +; CHECK-NEXT: msub w8, w15, w13, w14 +; CHECK-NEXT: smov w13, v1.b[15] ; CHECK-NEXT: smov w14, v0.b[15] -; CHECK-NEXT: sdiv w17, w0, w18 -; CHECK-NEXT: mov v2.b[11], w11 -; CHECK-NEXT: msub w11, w17, w18, w0 -; CHECK-NEXT: sdiv w12, w3, w2 -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: msub w12, w12, w2, w3 -; CHECK-NEXT: sdiv w8, w10, w9 -; CHECK-NEXT: mov v2.b[13], w12 -; CHECK-NEXT: msub w8, w8, w9, w10 -; CHECK-NEXT: sdiv w11, w14, w13 +; CHECK-NEXT: mov v2.b[11], w8 +; CHECK-NEXT: sdiv w9, w1, w0 +; CHECK-NEXT: msub w8, w18, w16, w17 +; CHECK-NEXT: mov v2.b[12], w8 +; CHECK-NEXT: sdiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w0, w1 +; CHECK-NEXT: mov v2.b[13], w8 +; CHECK-NEXT: sdiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.b[14], w8 -; CHECK-NEXT: msub w8, w11, w13, w14 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = srem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -935,19 +939,19 @@ ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w14, v1.h[2] ; CHECK-NEXT: smov w15, v0.h[2] +; CHECK-NEXT: smov w17, v1.h[3] +; CHECK-NEXT: smov w18, v0.h[3] ; CHECK-NEXT: sdiv w13, w12, w11 -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.h[3] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.h[3] +; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w9, w12, w13 +; CHECK-NEXT: sdiv w9, w18, w17 +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: msub w8, w9, w17, w18 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -969,34 +973,34 @@ ; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.h[4] ; CHECK-NEXT: smov w2, v0.h[4] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.h[5] +; CHECK-NEXT: smov w4, v1.h[5] +; CHECK-NEXT: smov w5, v0.h[5] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.h[5] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: smov w13, v1.h[7] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: smov w11, v0.h[6] +; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: smov w10, v1.h[6] -; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.h[7] ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.h[7] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: smov w14, v0.h[7] ; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: mov v2.h[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v2.h[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.h[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: sdiv w12, w16, w15 +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: mov v2.h[3], w8 +; CHECK-NEXT: sdiv w9, w5, w4 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: mov v2.h[4], w8 +; CHECK-NEXT: sdiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w4, w5 +; CHECK-NEXT: mov v2.h[5], w8 +; CHECK-NEXT: sdiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.h[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret @@ -1029,8 +1033,8 @@ ; CHECK-NEXT: mov w11, v1.s[1] ; CHECK-NEXT: mov w12, v0.s[1] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w13, w12, w11 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: msub w9, w13, w11, w12 ; CHECK-NEXT: mov v0.s[1], w9 @@ -1052,15 +1056,15 @@ ; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: mov w17, v1.s[3] ; CHECK-NEXT: mov w18, v0.s[3] -; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: sdiv w10, w9, w8 +; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 ; CHECK-NEXT: sdiv w9, w18, w17 -; CHECK-NEXT: mov v0.s[2], w10 +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: msub w8, w9, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret @@ -1091,8 +1095,8 @@ ; CHECK-NEXT: mov x11, v1.d[1] ; CHECK-NEXT: mov x12, v0.d[1] ; CHECK-NEXT: sdiv x10, x9, x8 -; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: sdiv x13, x12, x11 +; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: msub x9, x13, x11, x12 ; CHECK-NEXT: mov v0.d[1], x9 @@ -1132,34 +1136,34 @@ ; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.b[5] +; CHECK-NEXT: umov w4, v1.b[5] +; CHECK-NEXT: umov w5, v0.b[5] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: umov w13, v1.b[7] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: umov w11, v0.b[6] +; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: umov w10, v1.b[6] -; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.b[7] ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.b[7] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: umov w14, v0.b[7] ; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: udiv w12, w16, w15 +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: mov v2.b[3], w8 +; CHECK-NEXT: udiv w9, w5, w4 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: mov v2.b[4], w8 +; CHECK-NEXT: udiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w4, w5 +; CHECK-NEXT: mov v2.b[5], w8 +; CHECK-NEXT: udiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.b[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret @@ -1170,11 +1174,12 @@ define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: urem16x8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -1183,6 +1188,8 @@ ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 ; CHECK-NEXT: umov w11, v1.b[0] ; CHECK-NEXT: umov w12, v0.b[0] ; CHECK-NEXT: umov w8, v1.b[1] @@ -1202,72 +1209,73 @@ ; CHECK-NEXT: umov w22, v0.b[7] ; CHECK-NEXT: umov w24, v1.b[8] ; CHECK-NEXT: umov w25, v0.b[8] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.b[9] +; CHECK-NEXT: umov w27, v1.b[9] +; CHECK-NEXT: umov w28, v0.b[9] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.b[9] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: umov w13, v1.b[11] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: umov w11, v0.b[10] +; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: umov w10, v1.b[10] -; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.b[11] ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.b[11] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: umov w14, v0.b[11] +; CHECK-NEXT: umov w16, v1.b[12] ; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 -; CHECK-NEXT: umov w18, v1.b[12] ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: umov w0, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: umov w2, v1.b[13] +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: umov w17, v0.b[12] +; CHECK-NEXT: umov w0, v1.b[13] +; CHECK-NEXT: mov v2.b[3], w8 ; CHECK-NEXT: udiv w6, w5, w4 -; CHECK-NEXT: umov w3, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w17, w6, w4, w5 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: umov w1, v0.b[13] +; CHECK-NEXT: mov v2.b[4], w8 ; CHECK-NEXT: udiv w20, w19, w7 -; CHECK-NEXT: mov v2.b[5], w17 -; CHECK-NEXT: msub w17, w20, w7, w19 +; CHECK-NEXT: msub w8, w6, w4, w5 +; CHECK-NEXT: mov v2.b[5], w8 ; CHECK-NEXT: udiv w23, w22, w21 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[6], w17 -; CHECK-NEXT: msub w1, w23, w21, w22 +; CHECK-NEXT: msub w8, w20, w7, w19 +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[6], w8 ; CHECK-NEXT: udiv w26, w25, w24 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[7], w1 -; CHECK-NEXT: msub w1, w26, w24, w25 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[8], w1 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: umov w13, v1.b[15] -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: umov w9, v1.b[14] -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: umov w10, v0.b[14] -; CHECK-NEXT: udiv w14, w16, w15 +; CHECK-NEXT: msub w8, w23, w21, w22 +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[7], w8 +; CHECK-NEXT: udiv w9, w28, w27 +; CHECK-NEXT: msub w8, w26, w24, w25 +; CHECK-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[8], w8 +; CHECK-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: udiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w27, w28 +; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: udiv w15, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 +; CHECK-NEXT: umov w10, v1.b[14] +; CHECK-NEXT: umov w11, v0.b[14] ; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: msub w11, w14, w15, w16 +; CHECK-NEXT: udiv w18, w17, w16 +; CHECK-NEXT: msub w8, w15, w13, w14 +; CHECK-NEXT: umov w13, v1.b[15] ; CHECK-NEXT: umov w14, v0.b[15] -; CHECK-NEXT: udiv w17, w0, w18 -; CHECK-NEXT: mov v2.b[11], w11 -; CHECK-NEXT: msub w11, w17, w18, w0 -; CHECK-NEXT: udiv w12, w3, w2 -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: msub w12, w12, w2, w3 -; CHECK-NEXT: udiv w8, w10, w9 -; CHECK-NEXT: mov v2.b[13], w12 -; CHECK-NEXT: msub w8, w8, w9, w10 -; CHECK-NEXT: udiv w11, w14, w13 +; CHECK-NEXT: mov v2.b[11], w8 +; CHECK-NEXT: udiv w9, w1, w0 +; CHECK-NEXT: msub w8, w18, w16, w17 +; CHECK-NEXT: mov v2.b[12], w8 +; CHECK-NEXT: udiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w0, w1 +; CHECK-NEXT: mov v2.b[13], w8 +; CHECK-NEXT: udiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.b[14], w8 -; CHECK-NEXT: msub w8, w11, w13, w14 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = urem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -1299,19 +1307,19 @@ ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w14, v1.h[2] ; CHECK-NEXT: umov w15, v0.h[2] +; CHECK-NEXT: umov w17, v1.h[3] +; CHECK-NEXT: umov w18, v0.h[3] ; CHECK-NEXT: udiv w13, w12, w11 -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.h[3] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.h[3] +; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w9, w12, w13 +; CHECK-NEXT: udiv w9, w18, w17 +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: msub w8, w9, w17, w18 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1333,34 +1341,34 @@ ; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.h[4] ; CHECK-NEXT: umov w2, v0.h[4] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.h[5] +; CHECK-NEXT: umov w4, v1.h[5] +; CHECK-NEXT: umov w5, v0.h[5] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.h[5] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: umov w13, v1.h[7] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: umov w11, v0.h[6] +; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: umov w10, v1.h[6] -; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.h[7] ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.h[7] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: umov w14, v0.h[7] ; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: mov v2.h[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v2.h[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.h[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: udiv w12, w16, w15 +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: mov v2.h[3], w8 +; CHECK-NEXT: udiv w9, w5, w4 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: mov v2.h[4], w8 +; CHECK-NEXT: udiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w4, w5 +; CHECK-NEXT: mov v2.h[5], w8 +; CHECK-NEXT: udiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.h[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret @@ -1393,8 +1401,8 @@ ; CHECK-NEXT: mov w11, v1.s[1] ; CHECK-NEXT: mov w12, v0.s[1] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w13, w12, w11 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: msub w9, w13, w11, w12 ; CHECK-NEXT: mov v0.s[1], w9 @@ -1416,15 +1424,15 @@ ; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: mov w17, v1.s[3] ; CHECK-NEXT: mov w18, v0.s[3] -; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: udiv w10, w9, w8 +; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 ; CHECK-NEXT: udiv w9, w18, w17 -; CHECK-NEXT: mov v0.s[2], w10 +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: msub w8, w9, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret @@ -1455,8 +1463,8 @@ ; CHECK-NEXT: mov x11, v1.d[1] ; CHECK-NEXT: mov x12, v0.d[1] ; CHECK-NEXT: udiv x10, x9, x8 -; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: udiv x13, x12, x11 +; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: msub x9, x13, x11, x12 ; CHECK-NEXT: mov v0.d[1], x9 Index: llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll +++ llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll @@ -5,9 +5,9 @@ ; CHECK-LABEL: test: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: str w8, [x0] -; CHECK-NEXT: str w9, [x1] +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %tmp = shufflevector <1 x i32> , <1 x i32> undef, <3 x i32> %tmp2 = shufflevector <3 x i32> , <3 x i32> %tmp, <3 x i32> Index: llvm/test/CodeGen/AArch64/arm64-nvcast.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-nvcast.ll +++ llvm/test/CodeGen/AArch64/arm64-nvcast.ll @@ -6,13 +6,13 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: and x8, x1, #0x3 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: fmov.2d v0, #2.00000000 -; CHECK-NEXT: bfi x9, x8, #2, #2 +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x1, #0x3 +; CHECK-NEXT: bfi x8, x9, #2, #2 ; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: ldr s0, [x9] +; CHECK-NEXT: ldr s0, [x8] ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret @@ -27,13 +27,13 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: and x8, x1, #0x3 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: movi.16b v0, #63 -; CHECK-NEXT: bfi x9, x8, #2, #2 +; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x1, #0x3 +; CHECK-NEXT: bfi x8, x9, #2, #2 ; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: ldr s0, [x9] +; CHECK-NEXT: ldr s0, [x8] ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret @@ -53,8 +53,8 @@ define internal void @nvcast_f32_v8i8() { ; CHECK-LABEL: nvcast_f32_v8i8: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: adrp x8, __gv@GOTPAGE ; CHECK-NEXT: movi.8b v0, #254 +; CHECK-NEXT: adrp x8, __gv@GOTPAGE ; CHECK-NEXT: ldr x8, [x8, __gv@GOTPAGEOFF] ; CHECK-NEXT: str d0, [x8] ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/arm64-rev.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-rev.ll +++ llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -530,16 +530,16 @@ define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp { ; CHECK-LABEL: test_vrev64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: add x8, x1, #2 ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: add x8, x1, #2 ; CHECK-NEXT: st1.h { v0 }[5], [x8] ; CHECK-NEXT: st1.h { v0 }[6], [x1] ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vrev64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: add x8, x1, #2 ; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: add x8, x1, #2 ; GISEL-NEXT: st1.h { v0 }[6], [x1] ; GISEL-NEXT: st1.h { v0 }[5], [x8] ; GISEL-NEXT: ret @@ -567,8 +567,8 @@ ; ; GISEL-LABEL: float_vrev64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: adrp x8, .LCPI28_0 ; GISEL-NEXT: movi d0, #0000000000000000 +; GISEL-NEXT: adrp x8, .LCPI28_0 ; GISEL-NEXT: ldr q1, [x0] ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI28_0] ; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 @@ -706,19 +706,19 @@ define i64 @test_rev16_x_hwbyteswaps_complex1(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #48 -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: and x11, x9, #0xff000000000000 -; CHECK-NEXT: lsr x12, x0, #16 -; CHECK-NEXT: bfi x11, x8, #56, #8 -; CHECK-NEXT: and x8, x9, #0xff00000000 -; CHECK-NEXT: orr x8, x11, x8 -; CHECK-NEXT: and x9, x9, #0xff0000 -; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: orr x8, x8, x9 +; CHECK-NEXT: lsr x8, x0, #8 +; CHECK-NEXT: lsr x9, x0, #48 +; CHECK-NEXT: and x10, x8, #0xff000000000000 +; CHECK-NEXT: and x11, x8, #0xff00000000 +; CHECK-NEXT: bfi x10, x9, #56, #8 +; CHECK-NEXT: lsr x9, x0, #32 +; CHECK-NEXT: and x8, x8, #0xff0000 +; CHECK-NEXT: orr x10, x10, x11 +; CHECK-NEXT: bfi x10, x9, #40, #8 +; CHECK-NEXT: lsr x9, x0, #16 +; CHECK-NEXT: orr x8, x10, x8 +; CHECK-NEXT: bfi x8, x9, #24, #8 ; CHECK-NEXT: ubfiz x9, x0, #8, #8 -; CHECK-NEXT: bfi x8, x12, #24, #8 ; CHECK-NEXT: bfxil x8, x0, #8, #8 ; CHECK-NEXT: orr x0, x8, x9 ; CHECK-NEXT: ret @@ -767,14 +767,14 @@ define i64 @test_rev16_x_hwbyteswaps_complex2(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex2: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsr x8, x0, #8 ; CHECK-NEXT: lsr x9, x0, #48 ; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: lsr x8, x0, #8 -; CHECK-NEXT: lsr x11, x0, #16 ; CHECK-NEXT: and x8, x8, #0xff00ff00ff00ff ; CHECK-NEXT: bfi x8, x9, #56, #8 +; CHECK-NEXT: lsr x9, x0, #16 ; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: bfi x8, x11, #24, #8 +; CHECK-NEXT: bfi x8, x9, #24, #8 ; CHECK-NEXT: bfi x8, x0, #8, #8 ; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: ret @@ -824,19 +824,19 @@ define i64 @test_rev16_x_hwbyteswaps_complex3(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #48 -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: and x11, x9, #0xff000000000000 -; CHECK-NEXT: lsr x12, x0, #16 -; CHECK-NEXT: bfi x11, x8, #56, #8 -; CHECK-NEXT: and x8, x9, #0xff00000000 -; CHECK-NEXT: orr x8, x8, x11 -; CHECK-NEXT: and x9, x9, #0xff0000 -; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: lsr x8, x0, #8 +; CHECK-NEXT: lsr x9, x0, #48 +; CHECK-NEXT: and x10, x8, #0xff000000000000 +; CHECK-NEXT: and x11, x8, #0xff00000000 +; CHECK-NEXT: bfi x10, x9, #56, #8 +; CHECK-NEXT: lsr x9, x0, #32 +; CHECK-NEXT: and x8, x8, #0xff0000 +; CHECK-NEXT: orr x10, x11, x10 +; CHECK-NEXT: bfi x10, x9, #40, #8 +; CHECK-NEXT: lsr x9, x0, #16 +; CHECK-NEXT: orr x8, x8, x10 +; CHECK-NEXT: bfi x8, x9, #24, #8 ; CHECK-NEXT: ubfiz x9, x0, #8, #8 -; CHECK-NEXT: bfi x8, x12, #24, #8 ; CHECK-NEXT: bfxil x8, x0, #8, #8 ; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: ret @@ -885,11 +885,11 @@ define i64 @test_or_and_combine1(i64 %a) nounwind { ; CHECK-LABEL: test_or_and_combine1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #24 -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: and x10, x9, #0xff000000000000 -; CHECK-NEXT: bfi x10, x8, #32, #8 -; CHECK-NEXT: and x8, x9, #0xff0000 +; CHECK-NEXT: lsr x8, x0, #8 +; CHECK-NEXT: lsr x9, x0, #24 +; CHECK-NEXT: and x10, x8, #0xff000000000000 +; CHECK-NEXT: and x8, x8, #0xff0000 +; CHECK-NEXT: bfi x10, x9, #32, #8 ; CHECK-NEXT: orr x0, x10, x8 ; CHECK-NEXT: ret ; @@ -918,24 +918,24 @@ ; CHECK-LABEL: test_or_and_combine2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsr x8, x0, #8 -; CHECK-NEXT: lsl x10, x0, #8 -; CHECK-NEXT: and x9, x8, #0xff000000000000 +; CHECK-NEXT: lsl x9, x0, #8 +; CHECK-NEXT: and x10, x8, #0xff000000000000 ; CHECK-NEXT: and x8, x8, #0xff0000 -; CHECK-NEXT: orr x9, x9, x10 -; CHECK-NEXT: and x10, x10, #0xff00000000 -; CHECK-NEXT: orr x9, x9, x10 +; CHECK-NEXT: orr x10, x10, x9 +; CHECK-NEXT: and x9, x9, #0xff00000000 +; CHECK-NEXT: orr x9, x10, x9 ; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_or_and_combine2: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: lsr x8, x0, #8 -; GISEL-NEXT: lsl x10, x0, #8 -; GISEL-NEXT: and x9, x8, #0xff000000000000 +; GISEL-NEXT: lsl x9, x0, #8 +; GISEL-NEXT: and x10, x8, #0xff000000000000 ; GISEL-NEXT: and x8, x8, #0xff0000 -; GISEL-NEXT: orr x9, x9, x10 -; GISEL-NEXT: and x10, x10, #0xff00000000 -; GISEL-NEXT: orr x9, x9, x10 +; GISEL-NEXT: orr x10, x10, x9 +; GISEL-NEXT: and x9, x9, #0xff00000000 +; GISEL-NEXT: orr x9, x10, x9 ; GISEL-NEXT: orr x0, x9, x8 ; GISEL-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll +++ llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll @@ -4,9 +4,9 @@ define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { ; CHECK-LABEL: foo: ; CHECK: ; %bb.0: -; CHECK-NEXT: fmov.4s v2, #1.00000000 ; CHECK-NEXT: fcmeq.4s v0, v0, v1 -; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: fmov.4s v1, #1.00000000 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ret %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> @@ -19,9 +19,9 @@ define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwind { ; CHECK-LABEL: foo1: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4s v2, #1 ; CHECK-NEXT: fcmeq.4s v0, v0, v1 -; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: movi.4s v1, #1 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ushll2.2d v1, v0, #0 ; CHECK-NEXT: ushll.2d v0, v0, #0 ; CHECK-NEXT: scvtf.2d v1, v1 @@ -40,9 +40,9 @@ define <4 x float> @foo2(<4 x float> %val, <4 x float> %test) nounwind { ; CHECK-LABEL: foo2: ; CHECK: ; %bb.0: +; CHECK-NEXT: fcmeq.4s v0, v0, v1 ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x8, lCPI2_0@PAGE -; CHECK-NEXT: fcmeq.4s v0, v0, v1 ; CHECK-NEXT: Lloh1: ; CHECK-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] ; CHECK-NEXT: and.16b v0, v0, v1 Index: llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -473,8 +473,8 @@ ; ENABLE-NEXT: add x9, x8, #8 ; ENABLE-NEXT: str x9, [sp, #8] ; ENABLE-NEXT: ldr w8, [x8] -; ENABLE-NEXT: add w0, w0, w8 ; ENABLE-NEXT: subs w1, w1, #1 +; ENABLE-NEXT: add w0, w0, w8 ; ENABLE-NEXT: b.ne LBB6_2 ; ENABLE-NEXT: LBB6_3: ; %for.end ; ENABLE-NEXT: add sp, sp, #16 @@ -501,8 +501,8 @@ ; DISABLE-NEXT: add x9, x8, #8 ; DISABLE-NEXT: str x9, [sp, #8] ; DISABLE-NEXT: ldr w8, [x8] -; DISABLE-NEXT: add w0, w0, w8 ; DISABLE-NEXT: subs w1, w1, #1 +; DISABLE-NEXT: add w0, w0, w8 ; DISABLE-NEXT: b.ne LBB6_2 ; DISABLE-NEXT: b LBB6_4 ; DISABLE-NEXT: LBB6_3: ; %if.else @@ -638,8 +638,8 @@ ; ENABLE-NEXT: .cfi_offset w29, -16 ; ENABLE-NEXT: stp x1, x1, [sp, #32] ; ENABLE-NEXT: stp x1, x1, [sp, #16] -; ENABLE-NEXT: mov w0, w1 ; ENABLE-NEXT: stp x1, x1, [sp] +; ENABLE-NEXT: mov w0, w1 ; ENABLE-NEXT: bl _someVariadicFunc ; ENABLE-NEXT: lsl w0, w0, #3 ; ENABLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload @@ -666,8 +666,8 @@ ; DISABLE-NEXT: ; %bb.1: ; %if.then ; DISABLE-NEXT: stp x1, x1, [sp, #32] ; DISABLE-NEXT: stp x1, x1, [sp, #16] -; DISABLE-NEXT: mov w0, w1 ; DISABLE-NEXT: stp x1, x1, [sp] +; DISABLE-NEXT: mov w0, w1 ; DISABLE-NEXT: bl _someVariadicFunc ; DISABLE-NEXT: lsl w0, w0, #3 ; DISABLE-NEXT: b LBB8_3 @@ -1015,15 +1015,15 @@ ; ENABLE-NEXT: .cfi_offset w29, -16 ; ENABLE-NEXT: sub x9, sp, #16 ; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 -; ENABLE-NEXT: lsl w8, w0, w1 -; ENABLE-NEXT: lsl w9, w1, w0 +; ENABLE-NEXT: lsl w9, w0, w1 +; ENABLE-NEXT: lsl w8, w1, w0 ; ENABLE-NEXT: cmp w0, w1 ; ENABLE-NEXT: b.ge LBB13_2 ; ENABLE-NEXT: ; %bb.1: ; %true ; ENABLE-NEXT: str w0, [sp] ; ENABLE-NEXT: LBB13_2: ; %false -; ENABLE-NEXT: str w8, [x2] -; ENABLE-NEXT: str w9, [x3] +; ENABLE-NEXT: str w9, [x2] +; ENABLE-NEXT: str w8, [x3] ; ENABLE-NEXT: mov sp, x29 ; ENABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; ENABLE-NEXT: ret @@ -1038,15 +1038,15 @@ ; DISABLE-NEXT: .cfi_offset w29, -16 ; DISABLE-NEXT: sub x9, sp, #16 ; DISABLE-NEXT: and sp, x9, #0xffffffffffffffe0 -; DISABLE-NEXT: lsl w8, w0, w1 -; DISABLE-NEXT: lsl w9, w1, w0 +; DISABLE-NEXT: lsl w9, w0, w1 +; DISABLE-NEXT: lsl w8, w1, w0 ; DISABLE-NEXT: cmp w0, w1 ; DISABLE-NEXT: b.ge LBB13_2 ; DISABLE-NEXT: ; %bb.1: ; %true ; DISABLE-NEXT: str w0, [sp] ; DISABLE-NEXT: LBB13_2: ; %false -; DISABLE-NEXT: str w8, [x2] -; DISABLE-NEXT: str w9, [x3] +; DISABLE-NEXT: str w9, [x2] +; DISABLE-NEXT: str w8, [x3] ; DISABLE-NEXT: mov sp, x29 ; DISABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; DISABLE-NEXT: ret @@ -1099,16 +1099,16 @@ ; ENABLE-NEXT: .cfi_offset w28, -96 ; ENABLE-NEXT: sub x9, sp, #32 ; ENABLE-NEXT: and sp, x9, #0xffffffffffffffe0 -; ENABLE-NEXT: add w8, w1, w0 -; ENABLE-NEXT: lsl w9, w0, w1 -; ENABLE-NEXT: lsl w10, w1, w0 -; ENABLE-NEXT: lsr w12, w0, w1 -; ENABLE-NEXT: lsr w13, w1, w0 -; ENABLE-NEXT: sub w11, w10, w12 +; ENABLE-NEXT: lsl w8, w1, w0 +; ENABLE-NEXT: lsr w10, w0, w1 +; ENABLE-NEXT: lsl w16, w0, w1 +; ENABLE-NEXT: lsr w11, w1, w0 +; ENABLE-NEXT: add w14, w1, w0 +; ENABLE-NEXT: sub w9, w8, w10 ; ENABLE-NEXT: subs w17, w1, w0 -; ENABLE-NEXT: add w16, w9, w10 -; ENABLE-NEXT: add w14, w12, w13 -; ENABLE-NEXT: add w15, w13, w8 +; ENABLE-NEXT: add w15, w16, w8 +; ENABLE-NEXT: add w12, w10, w11 +; ENABLE-NEXT: add w13, w11, w14 ; ENABLE-NEXT: b.le LBB14_2 ; ENABLE-NEXT: ; %bb.1: ; %true ; ENABLE-NEXT: str w0, [sp] @@ -1116,15 +1116,15 @@ ; ENABLE-NEXT: nop ; ENABLE-NEXT: ; InlineAsm End ; ENABLE-NEXT: LBB14_2: ; %false -; ENABLE-NEXT: str w9, [x2] -; ENABLE-NEXT: str w10, [x3] -; ENABLE-NEXT: str w12, [x4] -; ENABLE-NEXT: str w13, [x5] -; ENABLE-NEXT: str w8, [x6] +; ENABLE-NEXT: str w16, [x2] +; ENABLE-NEXT: str w8, [x3] +; ENABLE-NEXT: str w10, [x4] +; ENABLE-NEXT: str w11, [x5] +; ENABLE-NEXT: str w14, [x6] ; ENABLE-NEXT: str w17, [x7] ; ENABLE-NEXT: stp w0, w1, [x2, #4] -; ENABLE-NEXT: stp w16, w11, [x2, #12] -; ENABLE-NEXT: stp w14, w15, [x2, #20] +; ENABLE-NEXT: stp w15, w9, [x2, #12] +; ENABLE-NEXT: stp w12, w13, [x2, #20] ; ENABLE-NEXT: sub sp, x29, #80 ; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload ; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload @@ -1159,16 +1159,16 @@ ; DISABLE-NEXT: .cfi_offset w28, -96 ; DISABLE-NEXT: sub x9, sp, #32 ; DISABLE-NEXT: and sp, x9, #0xffffffffffffffe0 -; DISABLE-NEXT: add w8, w1, w0 -; DISABLE-NEXT: lsl w9, w0, w1 -; DISABLE-NEXT: lsl w10, w1, w0 -; DISABLE-NEXT: lsr w12, w0, w1 -; DISABLE-NEXT: lsr w13, w1, w0 -; DISABLE-NEXT: sub w11, w10, w12 +; DISABLE-NEXT: lsl w8, w1, w0 +; DISABLE-NEXT: lsr w10, w0, w1 +; DISABLE-NEXT: lsl w16, w0, w1 +; DISABLE-NEXT: lsr w11, w1, w0 +; DISABLE-NEXT: add w14, w1, w0 +; DISABLE-NEXT: sub w9, w8, w10 ; DISABLE-NEXT: subs w17, w1, w0 -; DISABLE-NEXT: add w16, w9, w10 -; DISABLE-NEXT: add w14, w12, w13 -; DISABLE-NEXT: add w15, w13, w8 +; DISABLE-NEXT: add w15, w16, w8 +; DISABLE-NEXT: add w12, w10, w11 +; DISABLE-NEXT: add w13, w11, w14 ; DISABLE-NEXT: b.le LBB14_2 ; DISABLE-NEXT: ; %bb.1: ; %true ; DISABLE-NEXT: str w0, [sp] @@ -1176,15 +1176,15 @@ ; DISABLE-NEXT: nop ; DISABLE-NEXT: ; InlineAsm End ; DISABLE-NEXT: LBB14_2: ; %false -; DISABLE-NEXT: str w9, [x2] -; DISABLE-NEXT: str w10, [x3] -; DISABLE-NEXT: str w12, [x4] -; DISABLE-NEXT: str w13, [x5] -; DISABLE-NEXT: str w8, [x6] +; DISABLE-NEXT: str w16, [x2] +; DISABLE-NEXT: str w8, [x3] +; DISABLE-NEXT: str w10, [x4] +; DISABLE-NEXT: str w11, [x5] +; DISABLE-NEXT: str w14, [x6] ; DISABLE-NEXT: str w17, [x7] ; DISABLE-NEXT: stp w0, w1, [x2, #4] -; DISABLE-NEXT: stp w16, w11, [x2, #12] -; DISABLE-NEXT: stp w14, w15, [x2, #20] +; DISABLE-NEXT: stp w15, w9, [x2, #12] +; DISABLE-NEXT: stp w12, w13, [x2, #20] ; DISABLE-NEXT: sub sp, x29, #80 ; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload ; DISABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload Index: llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -225,14 +225,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: ldr w9, [sp, #72] ; CHECK-NEXT: movi.16b v2, #1 -; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: ldr w9, [sp] ; CHECK-NEXT: mov.b v0[2], w2 ; CHECK-NEXT: mov.b v1[2], w8 ; CHECK-NEXT: ldr w8, [sp, #88] @@ -252,33 +251,34 @@ ; CHECK-NEXT: mov.b v1[7], w8 ; CHECK-NEXT: ldr w8, [sp, #128] ; CHECK-NEXT: mov.b v0[8], w9 -; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: ldr w9, [sp, #8] ; CHECK-NEXT: mov.b v1[8], w8 ; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #24] +; CHECK-NEXT: mov.b v0[9], w9 +; CHECK-NEXT: ldr w9, [sp, #16] ; CHECK-NEXT: mov.b v1[9], w8 ; CHECK-NEXT: ldr w8, [sp, #144] ; CHECK-NEXT: mov.b v0[10], w9 -; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: ldr w9, [sp, #24] ; CHECK-NEXT: mov.b v1[10], w8 ; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 -; CHECK-NEXT: ldr w10, [sp, #40] +; CHECK-NEXT: mov.b v0[11], w9 +; CHECK-NEXT: ldr w9, [sp, #32] ; CHECK-NEXT: mov.b v1[11], w8 ; CHECK-NEXT: ldr w8, [sp, #160] ; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: ldr w9, [sp, #40] ; CHECK-NEXT: mov.b v1[12], w8 ; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 -; CHECK-NEXT: ldr w10, [sp, #56] +; CHECK-NEXT: mov.b v0[13], w9 +; CHECK-NEXT: ldr w9, [sp, #48] ; CHECK-NEXT: mov.b v1[13], w8 ; CHECK-NEXT: ldr w8, [sp, #176] ; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: ldr w9, [sp, #56] ; CHECK-NEXT: mov.b v1[14], w8 ; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 +; CHECK-NEXT: mov.b v0[15], w9 ; CHECK-NEXT: mov.b v1[15], w8 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: and.16b v1, v1, v2 @@ -291,65 +291,65 @@ ; CHECK-LABEL: sext_v32i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #64] -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: mov.b v1[1], w1 +; CHECK-NEXT: mov.b v0[1], w9 +; CHECK-NEXT: ldr w9, [sp, #128] +; CHECK-NEXT: mov.b v1[2], w2 +; CHECK-NEXT: mov.b v0[2], w8 ; CHECK-NEXT: ldr w8, [sp, #88] -; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: mov.b v1[3], w8 +; CHECK-NEXT: mov.b v1[3], w3 +; CHECK-NEXT: mov.b v0[3], w8 ; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: mov.b v1[4], w8 +; CHECK-NEXT: mov.b v1[4], w4 +; CHECK-NEXT: mov.b v0[4], w8 ; CHECK-NEXT: ldr w8, [sp, #104] -; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: mov.b v1[5], w8 +; CHECK-NEXT: mov.b v1[5], w5 +; CHECK-NEXT: mov.b v0[5], w8 ; CHECK-NEXT: ldr w8, [sp, #112] -; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w8 +; CHECK-NEXT: mov.b v1[6], w6 +; CHECK-NEXT: mov.b v0[6], w8 ; CHECK-NEXT: ldr w8, [sp, #120] -; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w8 -; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: mov.b v0[8], w9 -; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: mov.b v1[7], w7 +; CHECK-NEXT: mov.b v0[7], w8 +; CHECK-NEXT: ldr w8, [sp] ; CHECK-NEXT: mov.b v1[8], w8 -; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #24] +; CHECK-NEXT: ldr w8, [sp, #8] +; CHECK-NEXT: mov.b v0[8], w9 +; CHECK-NEXT: ldr w9, [sp, #136] ; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[10], w9 -; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: ldr w8, [sp, #16] +; CHECK-NEXT: mov.b v0[9], w9 +; CHECK-NEXT: ldr w9, [sp, #144] ; CHECK-NEXT: mov.b v1[10], w8 -; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 -; CHECK-NEXT: ldr w10, [sp, #40] +; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: mov.b v0[10], w9 +; CHECK-NEXT: ldr w9, [sp, #152] ; CHECK-NEXT: mov.b v1[11], w8 -; CHECK-NEXT: ldr w8, [sp, #160] -; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: mov.b v0[11], w9 +; CHECK-NEXT: ldr w9, [sp, #160] ; CHECK-NEXT: mov.b v1[12], w8 -; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 -; CHECK-NEXT: ldr w10, [sp, #56] +; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: mov.b v0[12], w9 +; CHECK-NEXT: ldr w9, [sp, #168] ; CHECK-NEXT: mov.b v1[13], w8 -; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: mov.b v0[13], w9 +; CHECK-NEXT: ldr w9, [sp, #176] ; CHECK-NEXT: mov.b v1[14], w8 -; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 +; CHECK-NEXT: ldr w8, [sp, #56] +; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: ldr w9, [sp, #184] ; CHECK-NEXT: mov.b v1[15], w8 -; CHECK-NEXT: shl.16b v0, v0, #7 +; CHECK-NEXT: mov.b v0[15], w9 ; CHECK-NEXT: shl.16b v1, v1, #7 -; CHECK-NEXT: cmlt.16b v0, v0, #0 -; CHECK-NEXT: cmlt.16b v1, v1, #0 +; CHECK-NEXT: shl.16b v2, v0, #7 +; CHECK-NEXT: cmlt.16b v0, v1, #0 +; CHECK-NEXT: cmlt.16b v1, v2, #0 ; CHECK-NEXT: ret %res = sext <32 x i1> %arg to <32 x i8> ret <32 x i8> %res @@ -360,129 +360,129 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #320] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp, #64] +; CHECK-NEXT: ldr w9, [sp, #328] ; CHECK-NEXT: ldr w10, [sp, #192] ; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: ldr w8, [sp, #328] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #200] -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #336] -; CHECK-NEXT: mov.b v3[1], w8 -; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: ldr w11, [sp, #352] -; CHECK-NEXT: mov.b v2[1], w9 -; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #344] -; CHECK-NEXT: mov.b v3[2], w10 -; CHECK-NEXT: ldr w10, [sp, #208] +; CHECK-NEXT: ldr w11, [sp, #208] +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: ldr w10, [sp, #200] +; CHECK-NEXT: mov.b v3[1], w9 +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: ldr w8, [sp, #336] +; CHECK-NEXT: mov.b v2[1], w10 +; CHECK-NEXT: ldr w10, [sp, #80] ; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: ldr w12, [sp, #368] -; CHECK-NEXT: ldr w13, [sp, #384] -; CHECK-NEXT: mov.b v1[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] -; CHECK-NEXT: mov.b v2[2], w10 -; CHECK-NEXT: ldr w10, [sp, #88] -; CHECK-NEXT: mov.b v3[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] +; CHECK-NEXT: ldr w12, [sp, #216] +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: ldr w9, [sp, #344] +; CHECK-NEXT: mov.b v3[2], w8 +; CHECK-NEXT: ldr w8, [sp, #352] +; CHECK-NEXT: mov.b v2[2], w11 +; CHECK-NEXT: ldr w11, [sp, #88] ; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: ldr w14, [sp, #400] -; CHECK-NEXT: mov.b v1[3], w10 -; CHECK-NEXT: ldr w10, [sp, #376] -; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v3[4], w11 -; CHECK-NEXT: ldr w11, [sp, #224] +; CHECK-NEXT: ldr w13, [sp, #224] +; CHECK-NEXT: mov.b v1[2], w10 +; CHECK-NEXT: ldr w10, [sp, #360] +; CHECK-NEXT: mov.b v3[3], w9 +; CHECK-NEXT: ldr w14, [sp, #232] +; CHECK-NEXT: mov.b v2[3], w12 +; CHECK-NEXT: ldr w12, [sp, #96] ; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: ldr w15, [sp, #416] -; CHECK-NEXT: mov.b v1[4], w8 -; CHECK-NEXT: ldr w8, [sp, #392] -; CHECK-NEXT: mov.b v2[4], w11 -; CHECK-NEXT: ldr w11, [sp, #104] -; CHECK-NEXT: mov.b v3[5], w9 -; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: ldr w9, [sp, #368] +; CHECK-NEXT: mov.b v1[3], w11 +; CHECK-NEXT: ldr w15, [sp, #240] +; CHECK-NEXT: mov.b v3[4], w8 +; CHECK-NEXT: ldr w11, [sp, #376] +; CHECK-NEXT: mov.b v2[4], w13 +; CHECK-NEXT: ldr w13, [sp, #104] ; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: ldr w16, [sp, #432] -; CHECK-NEXT: mov.b v1[5], w11 -; CHECK-NEXT: ldr w11, [sp, #408] -; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w9, [sp, #112] -; CHECK-NEXT: mov.b v3[6], w12 -; CHECK-NEXT: ldr w12, [sp, #240] +; CHECK-NEXT: ldr w16, [sp, #248] +; CHECK-NEXT: mov.b v1[4], w12 +; CHECK-NEXT: ldr w8, [sp, #384] +; CHECK-NEXT: mov.b v3[5], w10 +; CHECK-NEXT: ldr w12, [sp, #392] +; CHECK-NEXT: mov.b v2[5], w14 +; CHECK-NEXT: ldr w14, [sp, #112] ; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w9 -; CHECK-NEXT: ldr w9, [sp, #424] -; CHECK-NEXT: mov.b v2[6], w12 -; CHECK-NEXT: ldr w12, [sp, #120] -; CHECK-NEXT: mov.b v3[7], w10 -; CHECK-NEXT: ldr w10, [sp, #248] +; CHECK-NEXT: ldr w10, [sp, #400] +; CHECK-NEXT: mov.b v1[5], w13 +; CHECK-NEXT: ldr w13, [sp, #408] +; CHECK-NEXT: mov.b v3[6], w9 +; CHECK-NEXT: ldr w9, [sp, #416] +; CHECK-NEXT: mov.b v2[6], w15 +; CHECK-NEXT: ldr w15, [sp, #120] ; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w12 -; CHECK-NEXT: ldr w12, [sp] -; CHECK-NEXT: mov.b v2[7], w10 -; CHECK-NEXT: ldr w10, [sp, #128] -; CHECK-NEXT: mov.b v3[8], w13 -; CHECK-NEXT: ldr w13, [sp, #256] -; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #440] -; CHECK-NEXT: mov.b v1[8], w10 -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: mov.b v2[8], w13 -; CHECK-NEXT: ldr w13, [sp, #136] -; CHECK-NEXT: mov.b v3[9], w8 +; CHECK-NEXT: mov.b v1[6], w14 +; CHECK-NEXT: ldr w14, [sp, #424] +; CHECK-NEXT: mov.b v3[7], w11 +; CHECK-NEXT: ldr w11, [sp] +; CHECK-NEXT: mov.b v2[7], w16 +; CHECK-NEXT: ldr w16, [sp, #128] +; CHECK-NEXT: movi.16b v4, #1 +; CHECK-NEXT: mov.b v1[7], w15 +; CHECK-NEXT: ldr w15, [sp, #432] +; CHECK-NEXT: mov.b v0[8], w11 +; CHECK-NEXT: ldr w11, [sp, #256] +; CHECK-NEXT: mov.b v3[8], w8 +; CHECK-NEXT: ldr w8, [sp, #8] +; CHECK-NEXT: mov.b v1[8], w16 +; CHECK-NEXT: ldr w16, [sp, #440] +; CHECK-NEXT: mov.b v2[8], w11 +; CHECK-NEXT: ldr w11, [sp, #136] +; CHECK-NEXT: mov.b v0[9], w8 ; CHECK-NEXT: ldr w8, [sp, #264] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #272] -; CHECK-NEXT: mov.b v1[9], w13 -; CHECK-NEXT: ldr w13, [sp, #16] +; CHECK-NEXT: mov.b v3[9], w12 +; CHECK-NEXT: ldr w12, [sp, #16] +; CHECK-NEXT: mov.b v1[9], w11 +; CHECK-NEXT: ldr w11, [sp, #272] ; CHECK-NEXT: mov.b v2[9], w8 ; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v3[10], w14 -; CHECK-NEXT: ldr w14, [sp, #280] -; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #296] +; CHECK-NEXT: mov.b v0[10], w12 +; CHECK-NEXT: ldr w12, [sp, #280] +; CHECK-NEXT: mov.b v3[10], w10 +; CHECK-NEXT: ldr w10, [sp, #24] ; CHECK-NEXT: mov.b v1[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] -; CHECK-NEXT: mov.b v2[10], w10 -; CHECK-NEXT: ldr w10, [sp, #152] -; CHECK-NEXT: mov.b v3[11], w11 -; CHECK-NEXT: ldr w11, [sp, #288] -; CHECK-NEXT: mov.b v0[11], w8 -; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v1[11], w10 -; CHECK-NEXT: ldr w10, [sp, #160] -; CHECK-NEXT: mov.b v2[11], w14 -; CHECK-NEXT: mov.b v3[12], w15 -; CHECK-NEXT: mov.b v0[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] -; CHECK-NEXT: mov.b v2[12], w11 -; CHECK-NEXT: ldr w11, [sp, #312] -; CHECK-NEXT: mov.b v3[13], w9 -; CHECK-NEXT: ldr w9, [sp, #304] -; CHECK-NEXT: mov.b v0[13], w8 -; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] -; CHECK-NEXT: mov.b v2[13], w13 -; CHECK-NEXT: mov.b v3[14], w16 -; CHECK-NEXT: mov.b v0[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: ldr w9, [sp, #184] -; CHECK-NEXT: movi.16b v4, #1 -; CHECK-NEXT: mov.b v0[15], w8 -; CHECK-NEXT: mov.b v1[15], w9 -; CHECK-NEXT: mov.b v2[15], w11 -; CHECK-NEXT: mov.b v3[15], w12 +; CHECK-NEXT: ldr w8, [sp, #288] +; CHECK-NEXT: mov.b v2[10], w11 +; CHECK-NEXT: ldr w11, [sp, #152] +; CHECK-NEXT: mov.b v0[11], w10 +; CHECK-NEXT: ldr w10, [sp, #296] +; CHECK-NEXT: mov.b v3[11], w13 +; CHECK-NEXT: ldr w13, [sp, #32] +; CHECK-NEXT: mov.b v1[11], w11 +; CHECK-NEXT: ldr w11, [sp, #304] +; CHECK-NEXT: mov.b v2[11], w12 +; CHECK-NEXT: ldr w12, [sp, #160] +; CHECK-NEXT: mov.b v0[12], w13 +; CHECK-NEXT: ldr w13, [sp, #312] +; CHECK-NEXT: mov.b v3[12], w9 +; CHECK-NEXT: ldr w9, [sp, #40] +; CHECK-NEXT: mov.b v1[12], w12 +; CHECK-NEXT: mov.b v2[12], w8 +; CHECK-NEXT: ldr w8, [sp, #168] +; CHECK-NEXT: mov.b v0[13], w9 +; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: mov.b v3[13], w14 +; CHECK-NEXT: mov.b v1[13], w8 +; CHECK-NEXT: ldr w8, [sp, #176] +; CHECK-NEXT: mov.b v2[13], w10 +; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: ldr w9, [sp, #56] +; CHECK-NEXT: mov.b v3[14], w15 +; CHECK-NEXT: mov.b v1[14], w8 +; CHECK-NEXT: ldr w8, [sp, #184] +; CHECK-NEXT: mov.b v2[14], w11 +; CHECK-NEXT: mov.b v0[15], w9 +; CHECK-NEXT: mov.b v3[15], w16 +; CHECK-NEXT: mov.b v1[15], w8 +; CHECK-NEXT: mov.b v2[15], w13 ; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: and.16b v3, v3, v4 ; CHECK-NEXT: and.16b v1, v1, v4 ; CHECK-NEXT: and.16b v2, v2, v4 -; CHECK-NEXT: and.16b v3, v3, v4 ; CHECK-NEXT: ret %res = zext <64 x i1> %arg to <64 x i8> ret <64 x i8> %res @@ -492,131 +492,131 @@ ; CHECK-LABEL: sext_v64i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #320] -; CHECK-NEXT: fmov s3, w0 -; CHECK-NEXT: ldr w9, [sp, #64] +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ldr w9, [sp, #328] ; CHECK-NEXT: ldr w10, [sp, #192] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldr w8, [sp, #328] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: ldr w8, [sp, #64] +; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: ldr w11, [sp, #208] ; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #80] -; CHECK-NEXT: mov.b v0[1], w8 -; CHECK-NEXT: ldr w8, [sp, #200] +; CHECK-NEXT: ldr w10, [sp, #200] ; CHECK-NEXT: mov.b v1[1], w9 -; CHECK-NEXT: ldr w9, [sp, #336] -; CHECK-NEXT: mov.b v3[1], w1 +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: ldr w8, [sp, #336] +; CHECK-NEXT: mov.b v2[1], w10 +; CHECK-NEXT: ldr w10, [sp, #80] +; CHECK-NEXT: mov.b v0[2], w2 +; CHECK-NEXT: ldr w12, [sp, #216] +; CHECK-NEXT: mov.b v3[1], w9 +; CHECK-NEXT: ldr w9, [sp, #344] +; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: ldr w8, [sp, #352] +; CHECK-NEXT: mov.b v2[2], w11 ; CHECK-NEXT: ldr w11, [sp, #88] -; CHECK-NEXT: mov.b v2[1], w8 -; CHECK-NEXT: ldr w8, [sp, #344] -; CHECK-NEXT: mov.b v0[2], w9 -; CHECK-NEXT: ldr w9, [sp, #208] -; CHECK-NEXT: mov.b v1[2], w10 -; CHECK-NEXT: ldr w10, [sp, #352] -; CHECK-NEXT: mov.b v3[2], w2 +; CHECK-NEXT: mov.b v0[3], w3 +; CHECK-NEXT: ldr w13, [sp, #224] +; CHECK-NEXT: mov.b v3[2], w10 +; CHECK-NEXT: ldr w10, [sp, #360] +; CHECK-NEXT: mov.b v1[3], w9 +; CHECK-NEXT: ldr w14, [sp, #232] +; CHECK-NEXT: mov.b v2[3], w12 ; CHECK-NEXT: ldr w12, [sp, #96] -; CHECK-NEXT: mov.b v2[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] -; CHECK-NEXT: mov.b v0[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] -; CHECK-NEXT: mov.b v1[3], w11 +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: ldr w9, [sp, #368] +; CHECK-NEXT: mov.b v3[3], w11 +; CHECK-NEXT: ldr w15, [sp, #240] +; CHECK-NEXT: mov.b v1[4], w8 +; CHECK-NEXT: ldr w11, [sp, #376] +; CHECK-NEXT: mov.b v2[4], w13 ; CHECK-NEXT: ldr w13, [sp, #104] -; CHECK-NEXT: mov.b v3[3], w3 -; CHECK-NEXT: ldr w11, [sp, #368] -; CHECK-NEXT: mov.b v2[3], w8 +; CHECK-NEXT: mov.b v0[5], w5 +; CHECK-NEXT: ldr w16, [sp, #248] +; CHECK-NEXT: mov.b v3[4], w12 +; CHECK-NEXT: ldr w8, [sp, #384] +; CHECK-NEXT: mov.b v1[5], w10 +; CHECK-NEXT: ldr w12, [sp, #392] +; CHECK-NEXT: mov.b v2[5], w14 ; CHECK-NEXT: ldr w14, [sp, #112] -; CHECK-NEXT: mov.b v0[4], w10 -; CHECK-NEXT: ldr w10, [sp, #224] -; CHECK-NEXT: mov.b v1[4], w12 -; CHECK-NEXT: ldr w8, [sp, #376] -; CHECK-NEXT: mov.b v3[4], w4 +; CHECK-NEXT: mov.b v0[6], w6 +; CHECK-NEXT: ldr w10, [sp, #400] +; CHECK-NEXT: mov.b v3[5], w13 +; CHECK-NEXT: ldr w13, [sp, #408] +; CHECK-NEXT: mov.b v1[6], w9 +; CHECK-NEXT: ldr w9, [sp, #416] +; CHECK-NEXT: mov.b v2[6], w15 ; CHECK-NEXT: ldr w15, [sp, #120] -; CHECK-NEXT: mov.b v2[4], w10 -; CHECK-NEXT: ldr w12, [sp, #384] -; CHECK-NEXT: mov.b v0[5], w9 -; CHECK-NEXT: ldr w9, [sp, #232] -; CHECK-NEXT: mov.b v1[5], w13 +; CHECK-NEXT: mov.b v0[7], w7 +; CHECK-NEXT: mov.b v3[6], w14 +; CHECK-NEXT: ldr w14, [sp, #424] +; CHECK-NEXT: mov.b v1[7], w11 +; CHECK-NEXT: ldr w11, [sp] +; CHECK-NEXT: mov.b v2[7], w16 ; CHECK-NEXT: ldr w16, [sp, #128] -; CHECK-NEXT: mov.b v3[5], w5 -; CHECK-NEXT: ldr w10, [sp, #392] -; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w13, [sp, #400] -; CHECK-NEXT: mov.b v0[6], w11 -; CHECK-NEXT: ldr w11, [sp, #240] -; CHECK-NEXT: mov.b v1[6], w14 -; CHECK-NEXT: ldr w9, [sp, #408] -; CHECK-NEXT: mov.b v3[6], w6 -; CHECK-NEXT: ldr w14, [sp, #416] -; CHECK-NEXT: mov.b v2[6], w11 -; CHECK-NEXT: ldr w11, [sp, #424] -; CHECK-NEXT: mov.b v0[7], w8 -; CHECK-NEXT: ldr w8, [sp, #248] -; CHECK-NEXT: mov.b v1[7], w15 +; CHECK-NEXT: mov.b v3[7], w15 ; CHECK-NEXT: ldr w15, [sp, #432] -; CHECK-NEXT: mov.b v3[7], w7 -; CHECK-NEXT: mov.b v2[7], w8 -; CHECK-NEXT: ldr w8, [sp] -; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #256] -; CHECK-NEXT: mov.b v1[8], w16 +; CHECK-NEXT: mov.b v0[8], w11 +; CHECK-NEXT: ldr w11, [sp, #256] +; CHECK-NEXT: mov.b v1[8], w8 +; CHECK-NEXT: ldr w8, [sp, #8] +; CHECK-NEXT: mov.b v3[8], w16 ; CHECK-NEXT: ldr w16, [sp, #440] -; CHECK-NEXT: mov.b v3[8], w8 -; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v2[8], w12 -; CHECK-NEXT: ldr w12, [sp, #8] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #264] -; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #272] -; CHECK-NEXT: mov.b v3[9], w12 -; CHECK-NEXT: ldr w12, [sp, #144] -; CHECK-NEXT: mov.b v2[9], w10 -; CHECK-NEXT: ldr w10, [sp, #16] -; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #280] -; CHECK-NEXT: mov.b v1[10], w12 -; CHECK-NEXT: ldr w12, [sp, #152] -; CHECK-NEXT: mov.b v3[10], w10 -; CHECK-NEXT: ldr w10, [sp, #160] -; CHECK-NEXT: mov.b v2[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] -; CHECK-NEXT: mov.b v0[11], w9 -; CHECK-NEXT: ldr w9, [sp, #288] -; CHECK-NEXT: mov.b v1[11], w12 -; CHECK-NEXT: ldr w12, [sp, #296] -; CHECK-NEXT: mov.b v3[11], w8 -; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v2[11], w13 -; CHECK-NEXT: mov.b v0[12], w14 -; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] -; CHECK-NEXT: mov.b v3[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v2[12], w9 -; CHECK-NEXT: ldr w9, [sp, #304] -; CHECK-NEXT: mov.b v0[13], w11 -; CHECK-NEXT: ldr w11, [sp, #312] -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] +; CHECK-NEXT: mov.b v2[8], w11 +; CHECK-NEXT: ldr w11, [sp, #136] +; CHECK-NEXT: mov.b v0[9], w8 +; CHECK-NEXT: ldr w8, [sp, #264] +; CHECK-NEXT: mov.b v1[9], w12 +; CHECK-NEXT: ldr w12, [sp, #16] +; CHECK-NEXT: mov.b v3[9], w11 +; CHECK-NEXT: ldr w11, [sp, #272] +; CHECK-NEXT: mov.b v2[9], w8 +; CHECK-NEXT: ldr w8, [sp, #144] +; CHECK-NEXT: mov.b v0[10], w12 +; CHECK-NEXT: ldr w12, [sp, #280] +; CHECK-NEXT: mov.b v1[10], w10 +; CHECK-NEXT: ldr w10, [sp, #24] +; CHECK-NEXT: mov.b v3[10], w8 +; CHECK-NEXT: ldr w8, [sp, #288] +; CHECK-NEXT: mov.b v2[10], w11 +; CHECK-NEXT: ldr w11, [sp, #152] +; CHECK-NEXT: mov.b v0[11], w10 +; CHECK-NEXT: ldr w10, [sp, #296] +; CHECK-NEXT: mov.b v1[11], w13 +; CHECK-NEXT: ldr w13, [sp, #32] +; CHECK-NEXT: mov.b v3[11], w11 +; CHECK-NEXT: ldr w11, [sp, #304] +; CHECK-NEXT: mov.b v2[11], w12 +; CHECK-NEXT: ldr w12, [sp, #160] +; CHECK-NEXT: mov.b v0[12], w13 +; CHECK-NEXT: mov.b v1[12], w9 +; CHECK-NEXT: ldr w9, [sp, #40] +; CHECK-NEXT: mov.b v3[12], w12 +; CHECK-NEXT: mov.b v2[12], w8 +; CHECK-NEXT: ldr w8, [sp, #168] +; CHECK-NEXT: mov.b v0[13], w9 +; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: mov.b v1[13], w14 ; CHECK-NEXT: mov.b v3[13], w8 -; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v2[13], w12 -; CHECK-NEXT: mov.b v0[14], w15 -; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: ldr w10, [sp, #184] +; CHECK-NEXT: ldr w8, [sp, #176] +; CHECK-NEXT: mov.b v2[13], w10 +; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: ldr w9, [sp, #56] +; CHECK-NEXT: mov.b v1[14], w15 ; CHECK-NEXT: mov.b v3[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: mov.b v0[15], w16 -; CHECK-NEXT: mov.b v1[15], w10 +; CHECK-NEXT: ldr w8, [sp, #184] +; CHECK-NEXT: mov.b v2[14], w11 +; CHECK-NEXT: mov.b v0[15], w9 +; CHECK-NEXT: ldr w9, [sp, #312] +; CHECK-NEXT: mov.b v1[15], w16 ; CHECK-NEXT: mov.b v3[15], w8 -; CHECK-NEXT: mov.b v2[15], w11 -; CHECK-NEXT: shl.16b v4, v0, #7 -; CHECK-NEXT: shl.16b v1, v1, #7 +; CHECK-NEXT: mov.b v2[15], w9 +; CHECK-NEXT: shl.16b v0, v0, #7 +; CHECK-NEXT: shl.16b v4, v1, #7 ; CHECK-NEXT: shl.16b v3, v3, #7 ; CHECK-NEXT: shl.16b v2, v2, #7 -; CHECK-NEXT: cmlt.16b v0, v3, #0 -; CHECK-NEXT: cmlt.16b v1, v1, #0 +; CHECK-NEXT: cmlt.16b v0, v0, #0 +; CHECK-NEXT: cmlt.16b v1, v3, #0 ; CHECK-NEXT: cmlt.16b v2, v2, #0 ; CHECK-NEXT: cmlt.16b v3, v4, #0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll +++ llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll @@ -90,9 +90,9 @@ ; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_ ; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_ ; CHECK-NEXT: blr [[CALLEE]] +; CHECK-NEXT: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 ; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var -; CHECK-DAG: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0 -; CHECK-DAG: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var +; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var ; CHECK: ldr w0, [x[[TPIDR]], x[[TPOFF]]] ; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var Index: llvm/test/CodeGen/AArch64/arm64-vabs.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -1749,28 +1749,28 @@ ; CHECK-LABEL: uabd_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov.d x8, v0[1] -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: mov.d x10, v1[1] +; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: mov.d x9, v1[1] ; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: asr x12, x9, #63 +; CHECK-NEXT: asr x12, x10, #63 ; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: subs x9, x9, x11 -; CHECK-NEXT: sbc x11, x12, x13 -; CHECK-NEXT: asr x12, x8, #63 -; CHECK-NEXT: asr x13, x10, #63 -; CHECK-NEXT: subs x8, x8, x10 -; CHECK-NEXT: sbc x10, x12, x13 -; CHECK-NEXT: asr x12, x11, #63 -; CHECK-NEXT: asr x13, x10, #63 -; CHECK-NEXT: eor x9, x9, x12 +; CHECK-NEXT: subs x10, x10, x11 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: asr x14, x9, #63 +; CHECK-NEXT: sbc x12, x12, x13 +; CHECK-NEXT: subs x8, x8, x9 +; CHECK-NEXT: sbc x11, x11, x14 +; CHECK-NEXT: asr x9, x12, #63 +; CHECK-NEXT: asr x13, x11, #63 +; CHECK-NEXT: eor x10, x10, x9 +; CHECK-NEXT: eor x12, x12, x9 ; CHECK-NEXT: eor x8, x8, x13 -; CHECK-NEXT: eor x10, x10, x13 +; CHECK-NEXT: eor x11, x11, x13 ; CHECK-NEXT: subs x2, x8, x13 -; CHECK-NEXT: sbc x3, x10, x13 -; CHECK-NEXT: subs x8, x9, x12 -; CHECK-NEXT: eor x9, x11, x12 -; CHECK-NEXT: sbc x1, x9, x12 +; CHECK-NEXT: sbc x3, x11, x13 +; CHECK-NEXT: subs x8, x10, x9 ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: sbc x1, x12, x9 ; CHECK-NEXT: mov.d v0[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/arm64-vhadd.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -1078,9 +1078,9 @@ define <16 x i8> @andmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) nounwind { ; CHECK-LABEL: andmaskv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v3, #7 ; CHECK-NEXT: uzp1.16b v0, v0, v1 -; CHECK-NEXT: and.16b v0, v0, v3 +; CHECK-NEXT: movi.16b v1, #7 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: uhadd.16b v0, v0, v2 ; CHECK-NEXT: ret %zextsrc1 = and <16 x i16> %src1, @@ -1094,13 +1094,13 @@ define <16 x i8> @andmask2v16i8(<16 x i16> %src1, <16 x i16> %src2) nounwind { ; CHECK-LABEL: andmask2v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v4, #3 -; CHECK-NEXT: movi.16b v5, #7 ; CHECK-NEXT: uzp1.16b v2, v2, v3 +; CHECK-NEXT: movi.16b v3, #3 ; CHECK-NEXT: uzp1.16b v0, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v4 -; CHECK-NEXT: and.16b v0, v0, v5 -; CHECK-NEXT: uhadd.16b v0, v0, v1 +; CHECK-NEXT: movi.16b v1, #7 +; CHECK-NEXT: and.16b v2, v2, v3 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: uhadd.16b v0, v0, v2 ; CHECK-NEXT: ret %zextsrc1 = and <16 x i16> %src1, %zextsrc2 = and <16 x i16> %src2, Index: llvm/test/CodeGen/AArch64/arm64-vmul.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -458,10 +458,10 @@ ; CHECK-LABEL: smlsl8h_chain_with_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: smull.8h v0, v0, v2 -; CHECK-NEXT: mvn.8b v2, v2 -; CHECK-NEXT: movi.16b v3, #1 -; CHECK-NEXT: smlal.8h v0, v1, v2 -; CHECK-NEXT: sub.8h v0, v3, v0 +; CHECK-NEXT: mvn.8b v3, v2 +; CHECK-NEXT: movi.16b v2, #1 +; CHECK-NEXT: smlal.8h v0, v1, v3 +; CHECK-NEXT: sub.8h v0, v2, v0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %xor = xor <8 x i8> %v3, @@ -739,10 +739,10 @@ ; CHECK-LABEL: umlsl8h_chain_with_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: umull.8h v0, v0, v2 -; CHECK-NEXT: mvn.8b v2, v2 -; CHECK-NEXT: movi.16b v3, #1 -; CHECK-NEXT: umlal.8h v0, v1, v2 -; CHECK-NEXT: sub.8h v0, v3, v0 +; CHECK-NEXT: mvn.8b v3, v2 +; CHECK-NEXT: movi.16b v2, #1 +; CHECK-NEXT: umlal.8h v0, v1, v3 +; CHECK-NEXT: sub.8h v0, v2, v0 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %xor = xor <8 x i8> %v3, @@ -1140,13 +1140,13 @@ define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-LABEL: mul_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: mov.d x8, v1[1] -; CHECK-NEXT: mov.d x11, v0[1] -; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 -; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: mov.d x9, v0[1] +; CHECK-NEXT: mul x10, x11, x10 +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov.d v0[1], x8 ; CHECK-NEXT: ret %tmp1 = mul <2 x i64> %A, %B Index: llvm/test/CodeGen/AArch64/arm64-xaluo.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-xaluo.ll +++ llvm/test/CodeGen/AArch64/arm64-xaluo.ll @@ -18,16 +18,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, w1 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo1.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds w8, w0, w1 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str w8, [x2] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -50,16 +50,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, #4 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo2.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds w8, w0, #4 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4) @@ -82,16 +82,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs w8, w0, #4 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo3.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs w8, w0, #4 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4) @@ -116,8 +116,8 @@ ; FAST-NEXT: mov w8, #16777215 ; FAST-NEXT: adds w8, w0, w8 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo4.i32: @@ -125,8 +125,8 @@ ; GISEL-NEXT: mov w8, #16777215 ; GISEL-NEXT: adds w8, w0, w8 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215) @@ -181,16 +181,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x1 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo1.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds x8, x0, x1 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) @@ -212,16 +212,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, #4 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo2.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds x8, x0, #4 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4) @@ -243,16 +243,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs x8, x0, #4 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo3.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs x8, x0, #4 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4) @@ -274,16 +274,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, w1 ; FAST-NEXT: cset w9, hs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds w8, w0, w1 ; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str w8, [x2] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -305,16 +305,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x1 ; FAST-NEXT: cset w9, hs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds x8, x0, x1 ; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) @@ -336,16 +336,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs w8, w0, w1 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo1.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs w8, w0, w1 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str w8, [x2] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) @@ -367,16 +367,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, #4 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo2.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds w8, w0, #4 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str w8, [x1] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4) @@ -398,16 +398,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs x8, x0, x1 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs x8, x0, x1 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) @@ -429,16 +429,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs w8, w0, w1 ; FAST-NEXT: cset w9, lo -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs w8, w0, w1 ; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str w8, [x2] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) @@ -460,16 +460,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs x8, x0, x1 ; FAST-NEXT: cset w9, lo -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: subs x8, x0, x1 ; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str x8, [x2] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) @@ -484,16 +484,16 @@ ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: smull x8, w0, w1 ; SDAG-NEXT: cmp x8, w8, sxtw -; SDAG-NEXT: cset w0, ne ; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: cset w0, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: smull x8, w0, w1 ; FAST-NEXT: cmp x8, w8, sxtw -; FAST-NEXT: cset w9, ne ; FAST-NEXT: str w8, [x2] +; FAST-NEXT: cset w9, ne ; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; @@ -502,9 +502,9 @@ ; GISEL-NEXT: smull x8, w0, w1 ; GISEL-NEXT: mul w9, w0, w1 ; GISEL-NEXT: asr x8, x8, #32 +; GISEL-NEXT: str w9, [x2] ; GISEL-NEXT: cmp w8, w9, asr #31 ; GISEL-NEXT: cset w0, ne -; GISEL-NEXT: str w9, [x2] ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) @@ -519,28 +519,28 @@ ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: mul x8, x0, x1 ; SDAG-NEXT: smulh x9, x0, x1 +; SDAG-NEXT: str x8, [x2] ; SDAG-NEXT: cmp x9, x8, asr #63 ; SDAG-NEXT: cset w0, ne -; SDAG-NEXT: str x8, [x2] ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: mul x8, x0, x1 ; FAST-NEXT: smulh x9, x0, x1 +; FAST-NEXT: str x8, [x2] ; FAST-NEXT: cmp x9, x8, asr #63 ; FAST-NEXT: cset w9, ne -; FAST-NEXT: str x8, [x2] ; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: smulo.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mul x8, x0, x1 -; GISEL-NEXT: smulh x9, x0, x1 -; GISEL-NEXT: cmp x9, x8, asr #63 +; GISEL-NEXT: smulh x8, x0, x1 +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: cmp x8, x9, asr #63 +; GISEL-NEXT: str x9, [x2] ; GISEL-NEXT: cset w0, ne -; GISEL-NEXT: str x8, [x2] ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) @@ -562,16 +562,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x0 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: smulo2.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds x8, x0, x0 ; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2) @@ -586,17 +586,17 @@ ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: umull x8, w0, w1 ; SDAG-NEXT: tst x8, #0xffffffff00000000 -; SDAG-NEXT: cset w0, ne ; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: cset w0, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: umulo.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umull x8, w0, w1 ; FAST-NEXT: tst x8, #0xffffffff00000000 +; FAST-NEXT: str w8, [x2] ; FAST-NEXT: cset w9, ne ; FAST-NEXT: and w0, w9, #0x1 -; FAST-NEXT: str w8, [x2] ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo.i32: @@ -604,10 +604,9 @@ ; GISEL-NEXT: umull x8, w0, w1 ; GISEL-NEXT: mul w9, w0, w1 ; GISEL-NEXT: lsr x8, x8, #32 -; GISEL-NEXT: cmp w8, #0 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: mov w0, w8 ; GISEL-NEXT: str w9, [x2] +; GISEL-NEXT: cmp w8, #0 +; GISEL-NEXT: cset w0, ne ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) @@ -623,8 +622,7 @@ ; SDAG-NEXT: umulh x8, x0, x1 ; SDAG-NEXT: mul x9, x0, x1 ; SDAG-NEXT: cmp xzr, x8 -; SDAG-NEXT: cset w8, ne -; SDAG-NEXT: mov w0, w8 +; SDAG-NEXT: cset w0, ne ; SDAG-NEXT: str x9, [x2] ; SDAG-NEXT: ret ; @@ -634,9 +632,8 @@ ; FAST-NEXT: mul x9, x0, x1 ; FAST-NEXT: cmp xzr, x8 ; FAST-NEXT: cset w8, ne -; FAST-NEXT: and w8, w8, #0x1 -; FAST-NEXT: mov w0, w8 ; FAST-NEXT: str x9, [x2] +; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo.i64: @@ -644,8 +641,7 @@ ; GISEL-NEXT: umulh x8, x0, x1 ; GISEL-NEXT: mul x9, x0, x1 ; GISEL-NEXT: cmp x8, #0 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: mov w0, w8 +; GISEL-NEXT: cset w0, ne ; GISEL-NEXT: str x9, [x2] ; GISEL-NEXT: ret entry: @@ -668,16 +664,16 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x0 ; FAST-NEXT: cset w9, hs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo2.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: adds x8, x0, x0 ; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: str x8, [x1] +; GISEL-NEXT: ubfx w0, w9, #0, #1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2) @@ -1185,13 +1181,29 @@ } define i64 @smulo.select.i64(i64 %v1, i64 %v2) { -; CHECK-LABEL: smulo.select.i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mul x8, x0, x1 -; CHECK-NEXT: smulh x9, x0, x1 -; CHECK-NEXT: cmp x9, x8, asr #63 -; CHECK-NEXT: csel x0, x0, x1, ne -; CHECK-NEXT: ret +; SDAG-LABEL: smulo.select.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mul x8, x0, x1 +; SDAG-NEXT: smulh x9, x0, x1 +; SDAG-NEXT: cmp x9, x8, asr #63 +; SDAG-NEXT: csel x0, x0, x1, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.select.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: mul x8, x0, x1 +; FAST-NEXT: smulh x9, x0, x1 +; FAST-NEXT: cmp x9, x8, asr #63 +; FAST-NEXT: csel x0, x0, x1, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.select.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: smulh x8, x0, x1 +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: cmp x8, x9, asr #63 +; GISEL-NEXT: csel x0, x0, x1, ne +; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1218,9 +1230,9 @@ ; ; GISEL-LABEL: smulo.not.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mul x8, x0, x1 -; GISEL-NEXT: smulh x9, x0, x1 -; GISEL-NEXT: cmp x9, x8, asr #63 +; GISEL-NEXT: smulh x8, x0, x1 +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: cmp x8, x9, asr #63 ; GISEL-NEXT: cset w8, ne ; GISEL-NEXT: eor w0, w8, #0x1 ; GISEL-NEXT: ret @@ -1996,28 +2008,28 @@ define i32 @umulo.selectboth.i32(i32 %a, i32 %b) { ; SDAG-LABEL: umulo.selectboth.i32: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: umull x9, w0, w1 -; SDAG-NEXT: mov w8, #10 -; SDAG-NEXT: tst x9, #0xffffffff00000000 -; SDAG-NEXT: csel w0, w9, w8, ne +; SDAG-NEXT: umull x8, w0, w1 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: tst x8, #0xffffffff00000000 +; SDAG-NEXT: csel w0, w8, w9, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: umulo.selectboth.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: umull x9, w0, w1 -; FAST-NEXT: mov w8, #10 -; FAST-NEXT: tst x9, #0xffffffff00000000 -; FAST-NEXT: csel w0, w9, w8, ne +; FAST-NEXT: umull x8, w0, w1 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: tst x8, #0xffffffff00000000 +; FAST-NEXT: csel w0, w8, w9, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo.selectboth.i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: umull x9, w0, w1 -; GISEL-NEXT: mov w8, #10 -; GISEL-NEXT: mul w10, w0, w1 -; GISEL-NEXT: lsr x9, x9, #32 -; GISEL-NEXT: cmp w9, #0 -; GISEL-NEXT: csel w0, w10, w8, ne +; GISEL-NEXT: umull x8, w0, w1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mul w9, w0, w1 +; GISEL-NEXT: lsr x8, x8, #32 +; GISEL-NEXT: cmp w8, #0 +; GISEL-NEXT: csel w0, w9, w10, ne ; GISEL-NEXT: ret entry: %m = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) @@ -2030,28 +2042,28 @@ define i32 @smulo.selectboth.i32(i32 %a, i32 %b) { ; SDAG-LABEL: smulo.selectboth.i32: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: smull x9, w0, w1 -; SDAG-NEXT: mov w8, #10 -; SDAG-NEXT: cmp x9, w9, sxtw -; SDAG-NEXT: csel w0, w9, w8, ne +; SDAG-NEXT: smull x8, w0, w1 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: cmp x8, w8, sxtw +; SDAG-NEXT: csel w0, w8, w9, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo.selectboth.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: smull x9, w0, w1 -; FAST-NEXT: mov w8, #10 -; FAST-NEXT: cmp x9, w9, sxtw -; FAST-NEXT: csel w0, w9, w8, ne +; FAST-NEXT: smull x8, w0, w1 +; FAST-NEXT: mov w9, #10 +; FAST-NEXT: cmp x8, w8, sxtw +; FAST-NEXT: csel w0, w8, w9, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: smulo.selectboth.i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: smull x9, w0, w1 -; GISEL-NEXT: mov w8, #10 -; GISEL-NEXT: mul w10, w0, w1 -; GISEL-NEXT: asr x9, x9, #32 -; GISEL-NEXT: cmp w9, w10, asr #31 -; GISEL-NEXT: csel w0, w10, w8, ne +; GISEL-NEXT: smull x8, w0, w1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mul w9, w0, w1 +; GISEL-NEXT: asr x8, x8, #32 +; GISEL-NEXT: cmp w8, w9, asr #31 +; GISEL-NEXT: csel w0, w9, w10, ne ; GISEL-NEXT: ret entry: %m = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b) @@ -2064,29 +2076,29 @@ define i64 @umulo.selectboth.i64(i64 %a, i64 %b) { ; SDAG-LABEL: umulo.selectboth.i64: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: umulh x9, x0, x1 +; SDAG-NEXT: umulh x8, x0, x1 +; SDAG-NEXT: mul x9, x0, x1 +; SDAG-NEXT: cmp xzr, x8 ; SDAG-NEXT: mov w8, #10 -; SDAG-NEXT: mul x10, x0, x1 -; SDAG-NEXT: cmp xzr, x9 -; SDAG-NEXT: csel x0, x10, x8, ne +; SDAG-NEXT: csel x0, x9, x8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: umulo.selectboth.i64: ; FAST: // %bb.0: // %entry -; FAST-NEXT: umulh x9, x0, x1 +; FAST-NEXT: umulh x8, x0, x1 +; FAST-NEXT: mul x9, x0, x1 +; FAST-NEXT: cmp xzr, x8 ; FAST-NEXT: mov x8, #10 -; FAST-NEXT: mul x10, x0, x1 -; FAST-NEXT: cmp xzr, x9 -; FAST-NEXT: csel x0, x10, x8, ne +; FAST-NEXT: csel x0, x9, x8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo.selectboth.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: umulh x9, x0, x1 -; GISEL-NEXT: mov w8, #10 -; GISEL-NEXT: mul x10, x0, x1 -; GISEL-NEXT: cmp x9, #0 -; GISEL-NEXT: csel x0, x10, x8, ne +; GISEL-NEXT: umulh x8, x0, x1 +; GISEL-NEXT: mov w10, #10 +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: cmp x8, #0 +; GISEL-NEXT: csel x0, x9, x10, ne ; GISEL-NEXT: ret entry: %m = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) @@ -2099,29 +2111,29 @@ define i64 @smulo.selectboth.i64(i64 %a, i64 %b) { ; SDAG-LABEL: smulo.selectboth.i64: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: mul x9, x0, x1 -; SDAG-NEXT: mov w8, #10 -; SDAG-NEXT: smulh x10, x0, x1 -; SDAG-NEXT: cmp x10, x9, asr #63 -; SDAG-NEXT: csel x0, x9, x8, ne +; SDAG-NEXT: mul x8, x0, x1 +; SDAG-NEXT: smulh x9, x0, x1 +; SDAG-NEXT: cmp x9, x8, asr #63 +; SDAG-NEXT: mov w9, #10 +; SDAG-NEXT: csel x0, x8, x9, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo.selectboth.i64: ; FAST: // %bb.0: // %entry -; FAST-NEXT: mul x9, x0, x1 -; FAST-NEXT: mov x8, #10 -; FAST-NEXT: smulh x10, x0, x1 -; FAST-NEXT: cmp x10, x9, asr #63 -; FAST-NEXT: csel x0, x9, x8, ne +; FAST-NEXT: mul x8, x0, x1 +; FAST-NEXT: smulh x9, x0, x1 +; FAST-NEXT: cmp x9, x8, asr #63 +; FAST-NEXT: mov x9, #10 +; FAST-NEXT: csel x0, x8, x9, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: smulo.selectboth.i64: ; GISEL: // %bb.0: // %entry +; GISEL-NEXT: smulh x8, x0, x1 +; GISEL-NEXT: mov w10, #10 ; GISEL-NEXT: mul x9, x0, x1 -; GISEL-NEXT: mov w8, #10 -; GISEL-NEXT: smulh x10, x0, x1 -; GISEL-NEXT: cmp x10, x9, asr #63 -; GISEL-NEXT: csel x0, x9, x8, ne +; GISEL-NEXT: cmp x8, x9, asr #63 +; GISEL-NEXT: csel x0, x9, x10, ne ; GISEL-NEXT: ret entry: %m = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %a, i64 %b) @@ -2433,11 +2445,11 @@ ; ; FAST-LABEL: smulo.br.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: smull x9, w0, w1 -; FAST-NEXT: mov w8, #1 -; FAST-NEXT: cmp x9, w9, sxtw -; FAST-NEXT: cset w9, ne -; FAST-NEXT: bic w8, w8, w9 +; FAST-NEXT: smull x8, w0, w1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cmp x8, w8, sxtw +; FAST-NEXT: cset w8, ne +; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2474,20 +2486,20 @@ ; ; FAST-LABEL: smulo.br.i64: ; FAST: // %bb.0: // %entry -; FAST-NEXT: mul x9, x0, x1 -; FAST-NEXT: mov w8, #1 -; FAST-NEXT: smulh x10, x0, x1 -; FAST-NEXT: cmp x10, x9, asr #63 -; FAST-NEXT: cset w9, ne -; FAST-NEXT: bic w8, w8, w9 +; FAST-NEXT: mul x8, x0, x1 +; FAST-NEXT: smulh x9, x0, x1 +; FAST-NEXT: cmp x9, x8, asr #63 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cset w8, ne +; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: smulo.br.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mul x8, x0, x1 -; GISEL-NEXT: smulh x9, x0, x1 -; GISEL-NEXT: cmp x9, x8, asr #63 +; GISEL-NEXT: smulh x8, x0, x1 +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: cmp x8, x9, asr #63 ; GISEL-NEXT: cset w8, ne ; GISEL-NEXT: eor w0, w8, #0x1 ; GISEL-NEXT: ret @@ -2550,11 +2562,11 @@ ; ; FAST-LABEL: umulo.br.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: umull x9, w0, w1 -; FAST-NEXT: mov w8, #1 -; FAST-NEXT: tst x9, #0xffffffff00000000 -; FAST-NEXT: cset w9, ne -; FAST-NEXT: bic w8, w8, w9 +; FAST-NEXT: umull x8, w0, w1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: tst x8, #0xffffffff00000000 +; FAST-NEXT: cset w8, ne +; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2589,11 +2601,11 @@ ; ; FAST-LABEL: umulo.br.i64: ; FAST: // %bb.0: // %entry -; FAST-NEXT: umulh x9, x0, x1 -; FAST-NEXT: mov w8, #1 -; FAST-NEXT: cmp xzr, x9 -; FAST-NEXT: cset w9, ne -; FAST-NEXT: bic w8, w8, w9 +; FAST-NEXT: umulh x8, x0, x1 +; FAST-NEXT: mov w9, #1 +; FAST-NEXT: cmp xzr, x8 +; FAST-NEXT: cset w8, ne +; FAST-NEXT: bic w8, w9, w8 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; Index: llvm/test/CodeGen/AArch64/arm64_32-addrs.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64_32-addrs.ll +++ llvm/test/CodeGen/AArch64/arm64_32-addrs.ll @@ -42,10 +42,10 @@ define i8 @test_valid_wrap_optimizable2(i8* %base, i32 %offset) { ; CHECK-LABEL: test_valid_wrap_optimizable2: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #-100 ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: ldrb w0, [x9, x8] +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w9, #-100 +; CHECK-NEXT: ldrb w0, [x8, x9] ; CHECK-NEXT: ret %newaddr = getelementptr inbounds i8, i8* inttoptr(i32 -100 to i8*), i32 %offset Index: llvm/test/CodeGen/AArch64/arm64_32.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64_32.ll +++ llvm/test/CodeGen/AArch64/arm64_32.ll @@ -734,9 +734,8 @@ define void @test_memset(i64 %in, i8 %value) { ; CHECK-LABEL: test_memset: -; CHECK-DAG: and x8, x0, #0xffffffff ; CHECK-DAG: lsr x2, x0, #32 -; CHECK-DAG: mov x0, x8 +; CHECK-DAG: and x0, x0, #0xffffffff ; CHECK: b _memset %ptr.i32 = trunc i64 %in to i32 Index: llvm/test/CodeGen/AArch64/atomic-ops-lse.ll =================================================================== --- llvm/test/CodeGen/AArch64/atomic-ops-lse.ll +++ llvm/test/CodeGen/AArch64/atomic-ops-lse.ll @@ -1542,9 +1542,9 @@ %old = atomicrmw sub i8* @var8, i8 -1 seq_cst ; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: mov w[[IMM:[0-9]+]], #1 ; CHECK: ldaddalb w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] ; CHECK-NOT: dmb @@ -1565,9 +1565,9 @@ %old = atomicrmw sub i16* @var16, i16 -1 seq_cst ; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: mov w[[IMM:[0-9]+]], #1 ; CHECK: ldaddalh w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] ; CHECK-NOT: dmb @@ -1588,9 +1588,9 @@ %old = atomicrmw sub i32* @var32, i32 -1 seq_cst ; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: mov w[[IMM:[0-9]+]], #1 ; CHECK: ldaddal w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] ; CHECK-NOT: dmb @@ -1611,9 +1611,9 @@ %old = atomicrmw sub i64* @var64, i64 -1 seq_cst ; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: mov w[[IMM:[0-9]+]], #1 ; CHECK: ldaddal x[[IMM]], x[[NEW:[0-9]+]], [x[[ADDR]]] ; CHECK-NOT: dmb @@ -1809,9 +1809,9 @@ ; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i8* @var8, i8 -2 seq_cst ; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK: mov w[[CONST:[0-9]+]], #1 ; CHECK: ldclralb w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] ; CHECK-NOT: dmb ret i8 %old @@ -1830,9 +1830,9 @@ ; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i16* @var16, i16 -2 seq_cst ; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK: mov w[[CONST:[0-9]+]], #1 ; CHECK: ldclralh w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] ; CHECK-NOT: dmb ret i16 %old @@ -1851,9 +1851,9 @@ ; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i32* @var32, i32 -2 seq_cst ; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 +; CHECK: mov w[[CONST:[0-9]+]], #1 ; CHECK: ldclral w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] ; CHECK-NOT: dmb ret i32 %old @@ -1872,9 +1872,9 @@ ; OUTLINE-ATOMICS-NEXT: ret %old = atomicrmw and i64* @var64, i64 -2 seq_cst ; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 +; CHECK: mov w[[CONST:[0-9]+]], #1 ; CHECK: ldclral x[[CONST]], x[[NEW:[0-9]+]], [x[[ADDR]]] ; CHECK-NOT: dmb ret i64 %old Index: llvm/test/CodeGen/AArch64/bfis-in-loop.ll =================================================================== --- llvm/test/CodeGen/AArch64/bfis-in-loop.ll +++ llvm/test/CodeGen/AArch64/bfis-in-loop.ll @@ -13,26 +13,26 @@ define i64 @bfis_in_loop_zero() { ; CHECK-LABEL: bfis_in_loop_zero: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:global +; CHECK-NEXT: adrp x8, :got:global ; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ldr x9, [x9, :got_lo12:global] -; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: ldr x8, [x8, :got_lo12:global] +; CHECK-NEXT: mov w9, wzr +; CHECK-NEXT: ldr x8, [x8] ; CHECK-NEXT: .LBB0_1: // %midblock ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrh w10, [x9, #72] +; CHECK-NEXT: ldrh w10, [x8, #72] +; CHECK-NEXT: and x11, x0, #0xffffffff00000000 +; CHECK-NEXT: ldr x13, [x8, #8] +; CHECK-NEXT: ubfx x12, x10, #8, #24 ; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: ubfx x11, x10, #8, #24 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: csel w8, w8, w11, eq -; CHECK-NEXT: ldr x11, [x9, #8] -; CHECK-NEXT: and x9, x10, #0xff -; CHECK-NEXT: and x10, x0, #0xffffffff00000000 -; CHECK-NEXT: bfi x9, x8, #8, #32 -; CHECK-NEXT: bfi x10, x12, #16, #1 -; CHECK-NEXT: orr x0, x10, x9 -; CHECK-NEXT: ldr x9, [x11, #16] -; CHECK-NEXT: cbnz x11, .LBB0_1 +; CHECK-NEXT: and x10, x10, #0xff +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: csel w9, w9, w12, eq +; CHECK-NEXT: bfi x11, x8, #16, #1 +; CHECK-NEXT: bfi x10, x9, #8, #32 +; CHECK-NEXT: ldr x8, [x13, #16] +; CHECK-NEXT: orr x0, x11, x10 +; CHECK-NEXT: cbnz x13, .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit ; CHECK-NEXT: ret entry: @@ -84,24 +84,24 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x9, :got:global ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: // implicit-def: $x0 ; CHECK-NEXT: ldr x9, [x9, :got_lo12:global] +; CHECK-NEXT: // implicit-def: $x0 ; CHECK-NEXT: ldr x9, [x9] ; CHECK-NEXT: .LBB1_1: // %midblock ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrh w10, [x9, #72] +; CHECK-NEXT: and x11, x0, #0xffffffff00000000 +; CHECK-NEXT: ldr x13, [x9, #8] +; CHECK-NEXT: ubfx x12, x10, #8, #24 ; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: ubfx x11, x10, #8, #24 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: csel w8, w8, w11, eq -; CHECK-NEXT: ldr x11, [x9, #8] -; CHECK-NEXT: and x9, x10, #0xff -; CHECK-NEXT: and x10, x0, #0xffffffff00000000 -; CHECK-NEXT: bfi x9, x8, #8, #32 -; CHECK-NEXT: bfi x10, x12, #16, #1 -; CHECK-NEXT: orr x0, x10, x9 -; CHECK-NEXT: ldr x9, [x11, #16] -; CHECK-NEXT: cbnz x11, .LBB1_1 +; CHECK-NEXT: and x10, x10, #0xff +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: csel w8, w8, w12, eq +; CHECK-NEXT: bfi x11, x9, #16, #1 +; CHECK-NEXT: bfi x10, x8, #8, #32 +; CHECK-NEXT: ldr x9, [x13, #16] +; CHECK-NEXT: orr x0, x11, x10 +; CHECK-NEXT: cbnz x13, .LBB1_1 ; CHECK-NEXT: // %bb.2: // %exit ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/bitfield-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -166,11 +166,11 @@ ; CHECK-LABEL: test_32bit_badmask: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: mov w10, #135 +; CHECK-NEXT: mov w10, #632 ; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: mov w11, #632 -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w9, w11, w9, lsl #3 +; CHECK-NEXT: mov w11, #135 +; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: and w9, w10, w9, lsl #3 ; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret @@ -191,13 +191,13 @@ define void @test_64bit_badmask(i64 *%existing, i64 *%new) { ; CHECK-LABEL: test_64bit_badmask: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w8, #135 -; CHECK-NEXT: ldr x10, [x1] +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov w10, #135 +; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: mov w11, #664 -; CHECK-NEXT: and x8, x9, x8 -; CHECK-NEXT: lsl w10, w10, #3 -; CHECK-NEXT: and x9, x10, x11 +; CHECK-NEXT: and x8, x8, x10 +; CHECK-NEXT: lsl w9, w9, #3 +; CHECK-NEXT: and x9, x9, x11 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret @@ -545,8 +545,8 @@ define i32 @test9(i64 %b, i32 %e) { ; CHECK-LABEL: test9: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w1, #23 ; CHECK-NEXT: lsr x0, x0, #12 +; CHECK-NEXT: lsr w8, w1, #23 ; CHECK-NEXT: bfi w0, w8, #23, #9 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/branch-relax-bcc.ll =================================================================== --- llvm/test/CodeGen/AArch64/branch-relax-bcc.ll +++ llvm/test/CodeGen/AArch64/branch-relax-bcc.ll @@ -4,8 +4,8 @@ define i32 @invert_bcc(float %x, float %y) #0 { ; CHECK-LABEL: invert_bcc: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: mov w8, #42 ; CHECK-NEXT: b.pl LBB0_3 ; CHECK-NEXT: b LBB0_2 Index: llvm/test/CodeGen/AArch64/build-one-lane.ll =================================================================== --- llvm/test/CodeGen/AArch64/build-one-lane.ll +++ llvm/test/CodeGen/AArch64/build-one-lane.ll @@ -320,11 +320,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: and x9, x0, #0x1f -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov w10, #30 ; CHECK-NEXT: stp q0, q1, [sp] +; CHECK-NEXT: mov w10, #30 ; CHECK-NEXT: strb w10, [x8, x9] ; CHECK-NEXT: ldp q0, q1, [sp], #32 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/build-vector-extract.ll =================================================================== --- llvm/test/CodeGen/AArch64/build-vector-extract.ll +++ llvm/test/CodeGen/AArch64/build-vector-extract.ll @@ -42,10 +42,9 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 %z = zext i32 %e to i64 @@ -68,10 +67,9 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 %z = zext i32 %e to i64 @@ -94,10 +92,9 @@ define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 3 %z = zext i32 %e to i64 @@ -146,10 +143,9 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 %z = zext i32 %e to i64 @@ -171,10 +167,9 @@ define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 %z = zext i32 %e to i64 @@ -197,10 +192,9 @@ define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 3 %z = zext i32 %e to i64 @@ -223,10 +217,9 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 0 %z = zext i16 %e to i64 @@ -249,10 +242,9 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 1 %z = zext i16 %e to i64 @@ -275,10 +267,9 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 2 %z = zext i16 %e to i64 @@ -301,10 +292,9 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 3 %z = zext i16 %e to i64 @@ -327,10 +317,9 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 0 %z = zext i16 %e to i64 @@ -353,10 +342,9 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 1 %z = zext i16 %e to i64 @@ -379,10 +367,9 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 2 %z = zext i16 %e to i64 @@ -405,10 +392,9 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 3 %z = zext i16 %e to i64 @@ -433,10 +419,9 @@ define <2 x i64> @extract0_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 0 %z = zext i8 %e to i64 @@ -459,10 +444,9 @@ define <2 x i64> @extract1_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 1 %z = zext i8 %e to i64 @@ -485,10 +469,9 @@ define <2 x i64> @extract2_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 2 %z = zext i8 %e to i64 @@ -511,10 +494,9 @@ define <2 x i64> @extract3_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 3 %z = zext i8 %e to i64 @@ -537,10 +519,9 @@ define <2 x i64> @extract0_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 0 %z = zext i8 %e to i64 @@ -563,10 +544,9 @@ define <2 x i64> @extract1_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 1 %z = zext i8 %e to i64 @@ -589,10 +569,9 @@ define <2 x i64> @extract2_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 2 %z = zext i8 %e to i64 @@ -615,10 +594,9 @@ define <2 x i64> @extract3_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 3 %z = zext i8 %e to i64 Index: llvm/test/CodeGen/AArch64/cgp-usubo.ll =================================================================== --- llvm/test/CodeGen/AArch64/cgp-usubo.ll +++ llvm/test/CodeGen/AArch64/cgp-usubo.ll @@ -41,8 +41,8 @@ ; CHECK-NEXT: cmp w8, #42 ; CHECK-NEXT: sub w9, w9, w0 ; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: strb w9, [x1] +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %s = sub i8 42, %x %ov = icmp ugt i8 %x, 42 @@ -60,8 +60,8 @@ ; CHECK-NEXT: cmp w8, #43 ; CHECK-NEXT: sub w9, w9, w0 ; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: strh w9, [x1] +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %s = sub i16 43, %x %ov = icmp ult i16 43, %x @@ -77,9 +77,9 @@ ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: sub w9, w0, #44 ; CHECK-NEXT: cmp w8, #44 +; CHECK-NEXT: strh w9, [x1] ; CHECK-NEXT: cset w8, lo ; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: strh w9, [x1] ; CHECK-NEXT: ret %s = add i16 %x, -44 %ov = icmp ult i16 %x, 44 @@ -93,9 +93,9 @@ ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: sub w9, w0, #45 ; CHECK-NEXT: cmp w8, #45 +; CHECK-NEXT: strb w9, [x1] ; CHECK-NEXT: cset w8, lo ; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: strb w9, [x1] ; CHECK-NEXT: ret %ov = icmp ugt i8 45, %x %s = add i8 %x, -45 @@ -111,8 +111,8 @@ ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: sub w9, w0, #1 ; CHECK-NEXT: cset w8, eq -; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: str w9, [x1] +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %s = add i32 %x, -1 %ov = icmp eq i32 %x, 0 Index: llvm/test/CodeGen/AArch64/cmp-select-sign.ll =================================================================== --- llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -198,8 +198,8 @@ ; CHECK-LABEL: not_sign_4xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s ; CHECK-NEXT: and v1.16b, v0.16b, v2.16b ; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b @@ -214,8 +214,8 @@ ; CHECK-LABEL: not_sign_4xi32_2: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, @@ -244,19 +244,19 @@ ; CHECK-LABEL: sign_4xi65: ; CHECK: // %bb.0: ; CHECK-NEXT: sbfx x8, x1, #0, #1 -; CHECK-NEXT: sbfx x10, x5, #0, #1 -; CHECK-NEXT: orr x9, x8, #0x1 +; CHECK-NEXT: sbfx x9, x5, #0, #1 +; CHECK-NEXT: sbfx x10, x3, #0, #1 ; CHECK-NEXT: lsr x1, x8, #63 +; CHECK-NEXT: orr x8, x8, #0x1 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: sbfx x8, x7, #0, #1 -; CHECK-NEXT: orr x4, x10, #0x1 -; CHECK-NEXT: lsr x5, x10, #63 -; CHECK-NEXT: orr x6, x8, #0x1 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: sbfx x9, x3, #0, #1 -; CHECK-NEXT: orr x2, x9, #0x1 -; CHECK-NEXT: lsr x3, x9, #63 +; CHECK-NEXT: lsr x3, x10, #63 +; CHECK-NEXT: lsr x5, x9, #63 ; CHECK-NEXT: lsr x7, x8, #63 +; CHECK-NEXT: orr x2, x10, #0x1 ; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: orr x4, x9, #0x1 +; CHECK-NEXT: orr x6, x8, #0x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %c = icmp sgt <4 x i65> %a, Index: llvm/test/CodeGen/AArch64/combine-mul.ll =================================================================== --- llvm/test/CodeGen/AArch64/combine-mul.ll +++ llvm/test/CodeGen/AArch64/combine-mul.ll @@ -28,8 +28,8 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) { ; CHECK-LABEL: PR48683_vec_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #2 ; CHECK-NEXT: mul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #2 ; CHECK-NEXT: cmtst v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/consthoist-gep.ll =================================================================== --- llvm/test/CodeGen/AArch64/consthoist-gep.ll +++ llvm/test/CodeGen/AArch64/consthoist-gep.ll @@ -34,43 +34,43 @@ ; CHECK-NEXT: tbnz w9, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %bb3 ; CHECK-NEXT: mov w9, #44032 +; CHECK-NEXT: mov w11, #172 ; CHECK-NEXT: movk w9, #12296, lsl #16 -; CHECK-NEXT: orr w11, w9, #0x4 ; CHECK-NEXT: ldr w10, [x9] ; CHECK-NEXT: stur w10, [x8, #158] -; CHECK-NEXT: ldr w10, [x11] -; CHECK-NEXT: orr w11, w9, #0x8 +; CHECK-NEXT: orr w10, w9, #0x4 +; CHECK-NEXT: ldr w10, [x10] ; CHECK-NEXT: and w10, w10, #0xffff ; CHECK-NEXT: stur w10, [x8, #162] -; CHECK-NEXT: ldr w10, [x11] -; CHECK-NEXT: orr w11, w9, #0xc +; CHECK-NEXT: orr w10, w9, #0x8 +; CHECK-NEXT: ldr w10, [x10] ; CHECK-NEXT: and w10, w10, #0x1f1f1f1f ; CHECK-NEXT: stur w10, [x8, #166] -; CHECK-NEXT: ldr w10, [x11] -; CHECK-NEXT: mov w11, #172 -; CHECK-NEXT: orr w11, w9, w11 +; CHECK-NEXT: orr w10, w9, #0xc +; CHECK-NEXT: ldr w10, [x10] ; CHECK-NEXT: and w10, w10, #0x1f1f1f1f ; CHECK-NEXT: stur w10, [x8, #170] -; CHECK-NEXT: mov w10, #176 -; CHECK-NEXT: ldr w8, [x11] -; CHECK-NEXT: adrp x11, global+528 -; CHECK-NEXT: add x11, x11, :lo12:global+528 -; CHECK-NEXT: orr w10, w9, w10 +; CHECK-NEXT: orr w8, w9, w11 +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: adrp x10, global+528 +; CHECK-NEXT: add x10, x10, :lo12:global+528 +; CHECK-NEXT: mov w11, #176 ; CHECK-NEXT: and w8, w8, #0xffffff -; CHECK-NEXT: str w8, [x11] -; CHECK-NEXT: ldr w8, [x10] -; CHECK-NEXT: mov w10, #180 -; CHECK-NEXT: orr w10, w9, w10 +; CHECK-NEXT: str w8, [x10] +; CHECK-NEXT: orr w8, w9, w11 +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: mov w11, #180 ; CHECK-NEXT: and w8, w8, #0xffffff -; CHECK-NEXT: str w8, [x11, #4] -; CHECK-NEXT: ldr w8, [x10] -; CHECK-NEXT: mov w10, #184 -; CHECK-NEXT: orr w9, w9, w10 +; CHECK-NEXT: str w8, [x10, #4] +; CHECK-NEXT: orr w8, w9, w11 +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: mov w11, #184 ; CHECK-NEXT: and w8, w8, #0xffffff -; CHECK-NEXT: str w8, [x11, #8] -; CHECK-NEXT: ldr w8, [x9] +; CHECK-NEXT: str w8, [x10, #8] +; CHECK-NEXT: orr w8, w9, w11 +; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: and w8, w8, #0xffffff -; CHECK-NEXT: str w8, [x11, #12] +; CHECK-NEXT: str w8, [x10, #12] ; CHECK-NEXT: .LBB0_2: // %bb19 ; CHECK-NEXT: ret bb: Index: llvm/test/CodeGen/AArch64/copyprop.ll =================================================================== --- llvm/test/CodeGen/AArch64/copyprop.ll +++ llvm/test/CodeGen/AArch64/copyprop.ll @@ -7,19 +7,19 @@ ; CHECK-NEXT: cmp w0, #10 ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: // %bb.1: // %bb.0 -; CHECK-NEXT: mov w9, #15 +; CHECK-NEXT: mov w8, #15 +; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: str w9, [x2] -; CHECK-NEXT: mov w9, #12 ; CHECK-NEXT: str w8, [x1] -; CHECK-NEXT: str w9, [x4] +; CHECK-NEXT: mov w8, #12 +; CHECK-NEXT: str w8, [x4] ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %bb.1 ; CHECK-NEXT: mov w9, #25 ; CHECK-NEXT: str w9, [x3] -; CHECK-NEXT: mov w9, #12 ; CHECK-NEXT: str wzr, [x1] -; CHECK-NEXT: str w9, [x4] +; CHECK-NEXT: mov w8, #12 +; CHECK-NEXT: str w8, [x4] ; CHECK-NEXT: ret %1 = icmp eq i32 %v, 10 br i1 %1, label %bb.0, label %bb.1 Index: llvm/test/CodeGen/AArch64/ctpop-nonean.ll =================================================================== --- llvm/test/CodeGen/AArch64/ctpop-nonean.ll +++ llvm/test/CodeGen/AArch64/ctpop-nonean.ll @@ -6,30 +6,30 @@ define i128 @ctpop_i128(i128 %i) { ; CHECK-LABEL: ctpop_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x1, #1 -; CHECK-NEXT: lsr x9, x0, #1 +; CHECK-NEXT: lsr x8, x0, #1 +; CHECK-NEXT: lsr x9, x1, #1 ; CHECK-NEXT: and x8, x8, #0x5555555555555555 ; CHECK-NEXT: and x9, x9, #0x5555555555555555 -; CHECK-NEXT: sub x8, x1, x8 -; CHECK-NEXT: sub x9, x0, x9 -; CHECK-NEXT: and x10, x8, #0x3333333333333333 -; CHECK-NEXT: lsr x8, x8, #2 +; CHECK-NEXT: sub x8, x0, x8 +; CHECK-NEXT: sub x9, x1, x9 +; CHECK-NEXT: lsr x10, x8, #2 ; CHECK-NEXT: and x11, x9, #0x3333333333333333 ; CHECK-NEXT: lsr x9, x9, #2 ; CHECK-NEXT: and x8, x8, #0x3333333333333333 -; CHECK-NEXT: and x9, x9, #0x3333333333333333 -; CHECK-NEXT: add x8, x10, x8 -; CHECK-NEXT: add x9, x11, x9 -; CHECK-NEXT: mov x10, #72340172838076673 +; CHECK-NEXT: and x10, x10, #0x3333333333333333 ; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: add x8, x8, x10 +; CHECK-NEXT: and x9, x9, #0x3333333333333333 ; CHECK-NEXT: add x8, x8, x8, lsr #4 +; CHECK-NEXT: add x9, x11, x9 ; CHECK-NEXT: add x9, x9, x9, lsr #4 +; CHECK-NEXT: mov x10, #72340172838076673 ; CHECK-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f -; CHECK-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f ; CHECK-NEXT: mul x8, x8, x10 +; CHECK-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f ; CHECK-NEXT: mul x9, x9, x10 -; CHECK-NEXT: lsr x9, x9, #56 -; CHECK-NEXT: add x0, x9, x8, lsr #56 +; CHECK-NEXT: lsr x8, x8, #56 +; CHECK-NEXT: add x0, x8, x9, lsr #56 ; CHECK-NEXT: ret %c = call i128 @llvm.ctpop.i128(i128 %i) ret i128 %c Index: llvm/test/CodeGen/AArch64/dag-numsignbits.ll =================================================================== --- llvm/test/CodeGen/AArch64/dag-numsignbits.ll +++ llvm/test/CodeGen/AArch64/dag-numsignbits.ll @@ -6,13 +6,13 @@ define void @signbits_vXi1(<4 x i16> %a1) { ; CHECK-LABEL: signbits_vXi1: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: movi v2.4h, #1 ; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: mov w1, wzr ; CHECK-NEXT: mov w2, wzr -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: cmgt v0.4h, v2.4h, v0.4h ; CHECK-NEXT: umov w0, v0.h[0] Index: llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll =================================================================== --- llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll +++ llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll @@ -82,12 +82,13 @@ ; ALL-NEXT: smov w9, v1.b[0] ; ALL-NEXT: smov w16, v0.b[7] ; ALL-NEXT: smov w17, v0.b[8] +; ALL-NEXT: smov w18, v0.b[9] ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.b[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: smov w11, v1.b[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: smov w9, v1.b[9] +; ALL-NEXT: smov w9, v1.b[10] ; ALL-NEXT: mov v2.b[1], w8 ; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: smov w12, v1.b[4] @@ -109,10 +110,9 @@ ; ALL-NEXT: smov w16, v1.b[8] ; ALL-NEXT: mov v2.b[6], w14 ; ALL-NEXT: sdiv w16, w17, w16 -; ALL-NEXT: smov w17, v0.b[9] +; ALL-NEXT: smov w17, v1.b[9] ; ALL-NEXT: mov v2.b[7], w15 -; ALL-NEXT: sdiv w8, w17, w9 -; ALL-NEXT: smov w9, v1.b[10] +; ALL-NEXT: sdiv w8, w18, w17 ; ALL-NEXT: mov v2.b[8], w16 ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.b[11] @@ -153,6 +153,7 @@ ; ALL-NEXT: smov w11, v0.h[2] ; ALL-NEXT: smov w12, v0.h[3] ; ALL-NEXT: smov w13, v0.h[4] +; ALL-NEXT: smov w14, v0.h[5] ; ALL-NEXT: sdiv w8, w9, w8 ; ALL-NEXT: smov w9, v1.h[0] ; ALL-NEXT: sdiv w9, w10, w9 @@ -160,18 +161,17 @@ ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: smov w11, v1.h[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: smov w9, v1.h[5] +; ALL-NEXT: smov w9, v1.h[6] ; ALL-NEXT: mov v2.h[1], w8 ; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: smov w12, v1.h[4] ; ALL-NEXT: mov v2.h[2], w10 ; ALL-NEXT: smov w10, v0.h[6] ; ALL-NEXT: sdiv w12, w13, w12 -; ALL-NEXT: smov w13, v0.h[5] +; ALL-NEXT: smov w13, v1.h[5] ; ALL-NEXT: mov v2.h[3], w11 ; ALL-NEXT: smov w11, v0.h[7] -; ALL-NEXT: sdiv w8, w13, w9 -; ALL-NEXT: smov w9, v1.h[6] +; ALL-NEXT: sdiv w8, w14, w13 ; ALL-NEXT: mov v2.h[4], w12 ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.h[7] @@ -226,8 +226,8 @@ ; ALL-NEXT: mov x10, v1.d[1] ; ALL-NEXT: mov x11, v0.d[1] ; ALL-NEXT: sdiv x9, x9, x8 -; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: sdiv x11, x11, x10 +; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: fmov d2, x9 ; ALL-NEXT: fmov d1, x8 ; ALL-NEXT: mul x10, x11, x10 Index: llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll =================================================================== --- llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll +++ llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll @@ -82,12 +82,13 @@ ; ALL-NEXT: umov w9, v1.b[0] ; ALL-NEXT: umov w16, v0.b[7] ; ALL-NEXT: umov w17, v0.b[8] +; ALL-NEXT: umov w18, v0.b[9] ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.b[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: umov w11, v1.b[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: umov w9, v1.b[9] +; ALL-NEXT: umov w9, v1.b[10] ; ALL-NEXT: mov v2.b[1], w8 ; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: umov w12, v1.b[4] @@ -109,10 +110,9 @@ ; ALL-NEXT: umov w16, v1.b[8] ; ALL-NEXT: mov v2.b[6], w14 ; ALL-NEXT: udiv w16, w17, w16 -; ALL-NEXT: umov w17, v0.b[9] +; ALL-NEXT: umov w17, v1.b[9] ; ALL-NEXT: mov v2.b[7], w15 -; ALL-NEXT: udiv w8, w17, w9 -; ALL-NEXT: umov w9, v1.b[10] +; ALL-NEXT: udiv w8, w18, w17 ; ALL-NEXT: mov v2.b[8], w16 ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.b[11] @@ -153,6 +153,7 @@ ; ALL-NEXT: umov w11, v0.h[2] ; ALL-NEXT: umov w12, v0.h[3] ; ALL-NEXT: umov w13, v0.h[4] +; ALL-NEXT: umov w14, v0.h[5] ; ALL-NEXT: udiv w8, w9, w8 ; ALL-NEXT: umov w9, v1.h[0] ; ALL-NEXT: udiv w9, w10, w9 @@ -160,18 +161,17 @@ ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: umov w11, v1.h[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: umov w9, v1.h[5] +; ALL-NEXT: umov w9, v1.h[6] ; ALL-NEXT: mov v2.h[1], w8 ; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: umov w12, v1.h[4] ; ALL-NEXT: mov v2.h[2], w10 ; ALL-NEXT: umov w10, v0.h[6] ; ALL-NEXT: udiv w12, w13, w12 -; ALL-NEXT: umov w13, v0.h[5] +; ALL-NEXT: umov w13, v1.h[5] ; ALL-NEXT: mov v2.h[3], w11 ; ALL-NEXT: umov w11, v0.h[7] -; ALL-NEXT: udiv w8, w13, w9 -; ALL-NEXT: umov w9, v1.h[6] +; ALL-NEXT: udiv w8, w14, w13 ; ALL-NEXT: mov v2.h[4], w12 ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.h[7] @@ -226,8 +226,8 @@ ; ALL-NEXT: mov x10, v1.d[1] ; ALL-NEXT: mov x11, v0.d[1] ; ALL-NEXT: udiv x9, x9, x8 -; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: udiv x11, x11, x10 +; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: fmov d2, x9 ; ALL-NEXT: fmov d1, x8 ; ALL-NEXT: mul x10, x11, x10 Index: llvm/test/CodeGen/AArch64/expand-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/expand-select.ll +++ llvm/test/CodeGen/AArch64/expand-select.ll @@ -7,17 +7,17 @@ ; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: fmov s0, wzr ; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: ldp x9, x8, [sp] +; CHECK-NEXT: ldr x10, [sp] +; CHECK-NEXT: ldp x8, x9, [sp, #8] ; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s -; CHECK-NEXT: fmov w10, s0 -; CHECK-NEXT: tst w10, #0x1 -; CHECK-NEXT: ldr x10, [sp, #16] +; CHECK-NEXT: fmov w11, s0 +; CHECK-NEXT: tst w11, #0x1 ; CHECK-NEXT: csel x8, x5, x8, ne -; CHECK-NEXT: csel x9, x4, x9, ne -; CHECK-NEXT: csel x11, x3, x7, ne -; CHECK-NEXT: csel x12, x2, x6, ne -; CHECK-NEXT: stp x9, x8, [x10, #16] -; CHECK-NEXT: stp x12, x11, [x10] +; CHECK-NEXT: csel x10, x4, x10, ne +; CHECK-NEXT: stp x10, x8, [x9, #16] +; CHECK-NEXT: csel x8, x3, x7, ne +; CHECK-NEXT: csel x10, x2, x6, ne +; CHECK-NEXT: stp x10, x8, [x9] ; CHECK-NEXT: ret %cond = and i32 %In1, 1 %cbool = icmp eq i32 %cond, 0 @@ -33,9 +33,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: fmov s0, wzr -; CHECK-NEXT: ldp x10, x9, [sp] ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: ldr x11, [sp, #16] +; CHECK-NEXT: ldp x10, x9, [sp] ; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s ; CHECK-NEXT: dup v1.4s, v0.s[0] ; CHECK-NEXT: mov x8, v1.d[1] @@ -44,13 +44,13 @@ ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: csel x9, x5, x9, ne ; CHECK-NEXT: csel x10, x4, x10, ne -; CHECK-NEXT: tst w8, #0x1 -; CHECK-NEXT: csel x8, x2, x6, ne -; CHECK-NEXT: csel x12, x3, x7, ne ; CHECK-NEXT: stur x10, [x11, #12] +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: str w9, [x11, #20] +; CHECK-NEXT: csel x8, x2, x6, ne +; CHECK-NEXT: csel x10, x3, x7, ne ; CHECK-NEXT: str x8, [x11] -; CHECK-NEXT: str w12, [x11, #8] +; CHECK-NEXT: str w10, [x11, #8] ; CHECK-NEXT: ret %cond = and i32 %In1, 1 %cbool = icmp eq i32 %cond, 0 Index: llvm/test/CodeGen/AArch64/extract-bits.ll =================================================================== --- llvm/test/CodeGen/AArch64/extract-bits.ll +++ llvm/test/CodeGen/AArch64/extract-bits.ll @@ -71,12 +71,12 @@ define i32 @bextr32_a2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_a2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsl w8, w8, w2 -; CHECK-NEXT: sub w8, w8, #1 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: lsl w9, w9, w2 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: sub w9, w9, #1 +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, i32* %w %shifted = lshr i32 %val, %numskipbits @@ -89,12 +89,12 @@ define i32 @bextr32_a3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bextr32_a3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsl w8, w8, w2 -; CHECK-NEXT: sub w8, w8, #1 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: lsl w9, w9, w2 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: sub w9, w9, #1 +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, i32* %w %skip = zext i8 %numskipbits to i32 @@ -179,12 +179,12 @@ define i64 @bextr64_a2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_a2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsl x8, x8, x2 -; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: lsl x9, x9, x2 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: sub x9, x9, #1 +; CHECK-NEXT: and x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, i64* %w %shifted = lshr i64 %val, %numskipbits @@ -197,14 +197,14 @@ define i64 @bextr64_a3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bextr64_a3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: lsl x8, x8, x2 +; CHECK-NEXT: lsl x9, x9, x2 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: sub x9, x9, #1 +; CHECK-NEXT: and x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, i64* %w %skip = zext i8 %numskipbits to i64 @@ -570,12 +570,12 @@ define i32 @bextr32_c2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w2 ; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: lsr w8, w10, w8 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: lsr w9, w10, w9 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, i32* %w %shifted = lshr i32 %val, %numskipbits @@ -588,14 +588,14 @@ define i32 @bextr32_c3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: sub w8, w8, w2 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #32 +; CHECK-NEXT: sub w9, w9, w2 ; CHECK-NEXT: mov w10, #-1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: lsr w8, w10, w8 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: lsr w9, w10, w9 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, i32* %w %skip = zext i8 %numskipbits to i32 @@ -664,12 +664,12 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x2 ; CHECK-NEXT: mov x10, #-1 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: and x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, i64* %w %shifted = lshr i64 %val, %numskipbits @@ -682,14 +682,14 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: sub w8, w8, w2 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov w9, #64 +; CHECK-NEXT: sub w9, w9, w2 ; CHECK-NEXT: mov x10, #-1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: and x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, i64* %w %skip = zext i8 %numskipbits to i64 @@ -782,10 +782,10 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_d0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: lsr w9, w0, w1 -; CHECK-NEXT: lsl w9, w9, w8 -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits @@ -797,8 +797,8 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_d1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: mov w8, #32 ; CHECK-NEXT: lsr w9, w0, w1 ; CHECK-NEXT: sub w8, w8, w2 ; CHECK-NEXT: lsl w9, w9, w8 @@ -833,10 +833,10 @@ define i32 @bextr32_d3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_d3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 ; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: sub w8, w8, w2 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: mov w8, #32 +; CHECK-NEXT: sub w8, w8, w2 ; CHECK-NEXT: lsr w9, w9, w1 ; CHECK-NEXT: lsl w9, w9, w8 ; CHECK-NEXT: lsr w0, w9, w8 @@ -856,10 +856,10 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_d0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: lsr x9, x0, x1 -; CHECK-NEXT: lsl x9, x9, x8 -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg x9, x2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits @@ -871,8 +871,8 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_d1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: mov w8, #64 ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: sub w8, w8, w2 ; CHECK-NEXT: lsl x9, x9, x8 @@ -907,10 +907,10 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_d3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 ; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: sub w8, w8, w2 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: mov w8, #64 +; CHECK-NEXT: sub w8, w8, w2 ; CHECK-NEXT: lsr x9, x9, x1 ; CHECK-NEXT: lsl x9, x9, x8 ; CHECK-NEXT: lsr x0, x9, x8 @@ -931,10 +931,10 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_d0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: lsr x9, x0, x1 -; CHECK-NEXT: lsl x9, x9, x8 -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg x9, x2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits @@ -949,10 +949,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_d1: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: lsr x9, x0, x1 -; CHECK-NEXT: lsl w9, w9, w8 -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 Index: llvm/test/CodeGen/AArch64/extract-lowbits.ll =================================================================== --- llvm/test/CodeGen/AArch64/extract-lowbits.ll +++ llvm/test/CodeGen/AArch64/extract-lowbits.ll @@ -146,8 +146,8 @@ ; CHECK-LABEL: bzhi64_a3_load_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: lsl x8, x8, x1 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: and x0, x8, x9 @@ -331,9 +331,9 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: and w0, w8, w0 ; CHECK-NEXT: ret %numhighbits = sub i32 32, %numlowbits @@ -361,11 +361,11 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 ; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: lsr w8, w10, w8 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: lsr w9, w10, w9 +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, i32* %w %numhighbits = sub i32 32, %numlowbits @@ -395,9 +395,9 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: and w0, w0, w8 ; CHECK-NEXT: ret %numhighbits = sub i32 32, %numlowbits @@ -411,9 +411,9 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: mov x9, #-1 -; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 ; CHECK-NEXT: and x0, x8, x0 ; CHECK-NEXT: ret %numhighbits = sub i64 64, %numlowbits @@ -441,11 +441,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 ; CHECK-NEXT: mov x10, #-1 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: and x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, i64* %w %numhighbits = sub i64 64, %numlowbits @@ -475,9 +475,9 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: mov x9, #-1 -; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 ; CHECK-NEXT: and x0, x0, x8 ; CHECK-NEXT: ret %numhighbits = sub i64 64, %numlowbits @@ -521,10 +521,10 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_d2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsl w9, w9, w8 -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, i32* %w %numhighbits = sub i32 32, %numlowbits @@ -536,11 +536,11 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_d3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: lsl w9, w9, w8 -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #32 +; CHECK-NEXT: sub w9, w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, i32* %w %numhighbits = sub i8 32, %numlowbits @@ -583,10 +583,10 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_d2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsl x9, x9, x8 -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, i64* %w %numhighbits = sub i64 64, %numlowbits @@ -598,11 +598,11 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_d3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: lsl x9, x9, x8 -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov w9, #64 +; CHECK-NEXT: sub w9, w9, w1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, i64* %w %numhighbits = sub i8 64, %numlowbits Index: llvm/test/CodeGen/AArch64/faddp.ll =================================================================== --- llvm/test/CodeGen/AArch64/faddp.ll +++ llvm/test/CodeGen/AArch64/faddp.ll @@ -243,11 +243,11 @@ define <16 x float> @addp_v16f32(<16 x float> %a) { ; CHECK-LABEL: addp_v16f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: faddp v3.4s, v2.4s, v3.4s ; CHECK-NEXT: faddp v1.4s, v0.4s, v1.4s -; CHECK-NEXT: zip1 v2.4s, v3.4s, v3.4s +; CHECK-NEXT: faddp v3.4s, v2.4s, v3.4s ; CHECK-NEXT: zip1 v0.4s, v1.4s, v1.4s ; CHECK-NEXT: zip2 v1.4s, v1.4s, v1.4s +; CHECK-NEXT: zip1 v2.4s, v3.4s, v3.4s ; CHECK-NEXT: zip2 v3.4s, v3.4s, v3.4s ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll =================================================================== --- llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll +++ llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll @@ -410,9 +410,9 @@ define i64 @load_breg_offreg_immoff_2(i64 %a, i64 %b) { ; SDAG-LABEL: load_breg_offreg_immoff_2: ; SDAG: ; %bb.0: -; SDAG-NEXT: mov w8, #61440 -; SDAG-NEXT: add x9, x0, x1 -; SDAG-NEXT: ldr x0, [x9, x8] +; SDAG-NEXT: add x8, x0, x1 +; SDAG-NEXT: mov w9, #61440 +; SDAG-NEXT: ldr x0, [x8, x9] ; SDAG-NEXT: ret ; ; FAST-LABEL: load_breg_offreg_immoff_2: @@ -786,25 +786,15 @@ } define void @store_fi(i64 %i) { -; SDAG-LABEL: store_fi: -; SDAG: ; %bb.0: -; SDAG-NEXT: sub sp, sp, #32 -; SDAG-NEXT: .cfi_def_cfa_offset 32 -; SDAG-NEXT: mov x8, sp -; SDAG-NEXT: mov w9, #47 -; SDAG-NEXT: str w9, [x8, x0, lsl #2] -; SDAG-NEXT: add sp, sp, #32 -; SDAG-NEXT: ret -; -; FAST-LABEL: store_fi: -; FAST: ; %bb.0: -; FAST-NEXT: sub sp, sp, #32 -; FAST-NEXT: .cfi_def_cfa_offset 32 -; FAST-NEXT: mov w8, #47 -; FAST-NEXT: mov x9, sp -; FAST-NEXT: str w8, [x9, x0, lsl #2] -; FAST-NEXT: add sp, sp, #32 -; FAST-NEXT: ret +; CHECK-LABEL: store_fi: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov w9, #47 +; CHECK-NEXT: str w9, [x8, x0, lsl #2] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret %1 = alloca [8 x i32] %2 = ptrtoint [8 x i32]* %1 to i64 %3 = mul i64 %i, 4 Index: llvm/test/CodeGen/AArch64/fast-isel-gep.ll =================================================================== --- llvm/test/CodeGen/AArch64/fast-isel-gep.ll +++ llvm/test/CodeGen/AArch64/fast-isel-gep.ll @@ -54,9 +54,9 @@ ; CHECK-LABEL: test_array5: ; CHECK: ; %bb.0: ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: mov x8, #4 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: madd x0, x9, x8, x0 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov x9, #4 +; CHECK-NEXT: madd x0, x8, x9, x0 ; CHECK-NEXT: ret %1 = getelementptr inbounds i32, i32* %a, i32 %i ret i32* %1 Index: llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll =================================================================== --- llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll +++ llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll @@ -5,9 +5,9 @@ define void @test(i64 %a, i8* %b) { ; CHECK-LABEL: test: ; CHECK: ; %bb.0: -; CHECK-NEXT: and x8, x0, #0x7fffffffffffffff -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: str x9, [x8] +; CHECK-NEXT: ldr x8, [x1] +; CHECK-NEXT: and x9, x0, #0x7fffffffffffffff +; CHECK-NEXT: str x8, [x9] ; CHECK-NEXT: ret %1 = and i64 %a, 9223372036854775807 %2 = inttoptr i64 %1 to i8* Index: llvm/test/CodeGen/AArch64/fast-isel-shift.ll =================================================================== --- llvm/test/CodeGen/AArch64/fast-isel-shift.ll +++ llvm/test/CodeGen/AArch64/fast-isel-shift.ll @@ -391,9 +391,9 @@ define zeroext i8 @lsrv_i8(i8 %a, i8 %b) { ; CHECK-LABEL: lsrv_i8: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: and w9, w1, #0xff +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: uxtb w0, w8 ; CHECK-NEXT: ret @@ -458,9 +458,9 @@ define zeroext i16 @lsrv_i16(i16 %a, i16 %b) { ; CHECK-LABEL: lsrv_i16: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: and w9, w1, #0xffff +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: and w8, w8, #0xffff ; CHECK-NEXT: uxth w0, w8 ; CHECK-NEXT: ret @@ -517,9 +517,9 @@ define zeroext i8 @asrv_i8(i8 %a, i8 %b) { ; CHECK-LABEL: asrv_i8: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: asr w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: and w9, w1, #0xff +; CHECK-NEXT: asr w8, w8, w9 ; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: uxtb w0, w8 ; CHECK-NEXT: ret @@ -582,9 +582,9 @@ define zeroext i16 @asrv_i16(i16 %a, i16 %b) { ; CHECK-LABEL: asrv_i16: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: asr w8, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: and w9, w1, #0xffff +; CHECK-NEXT: asr w8, w8, w9 ; CHECK-NEXT: and w8, w8, #0xffff ; CHECK-NEXT: uxth w0, w8 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/fcvt_combine.ll =================================================================== --- llvm/test/CodeGen/AArch64/fcvt_combine.ll +++ llvm/test/CodeGen/AArch64/fcvt_combine.ll @@ -237,8 +237,8 @@ define <4 x i16> @test_v4f16(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -258,8 +258,8 @@ define <4 x i32> @test_v4f16_i32(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16_i32: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -332,8 +332,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzs w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %mul.i = fmul <2 x double> %d, @@ -377,8 +377,8 @@ ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %mul.i = fmul <2 x float> %f, %vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i) @@ -564,35 +564,35 @@ ; CHECK-NO16-NEXT: csel w13, w13, w9, lt ; CHECK-NO16-NEXT: mov s0, v1.s[2] ; CHECK-NO16-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fmov s2, w11 ; CHECK-NO16-NEXT: csel w13, w13, w10, gt ; CHECK-NO16-NEXT: cmp w14, w9 ; CHECK-NO16-NEXT: csel w14, w14, w9, lt ; CHECK-NO16-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: fcvtzs w16, s0 ; CHECK-NO16-NEXT: csel w14, w14, w10, gt ; CHECK-NO16-NEXT: cmp w15, w9 ; CHECK-NO16-NEXT: csel w15, w15, w9, lt -; CHECK-NO16-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: csel w11, w15, w10, gt -; CHECK-NO16-NEXT: fcvtzs w15, s0 ; CHECK-NO16-NEXT: mov s0, v1.s[3] -; CHECK-NO16-NEXT: mov v2.s[1], w8 +; CHECK-NO16-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-NO16-NEXT: fmov s1, w11 -; CHECK-NO16-NEXT: cmp w15, w9 -; CHECK-NO16-NEXT: csel w8, w15, w9, lt +; CHECK-NO16-NEXT: csel w11, w15, w10, gt +; CHECK-NO16-NEXT: cmp w16, w9 +; CHECK-NO16-NEXT: fmov s2, w11 +; CHECK-NO16-NEXT: mov v1.s[1], w8 +; CHECK-NO16-NEXT: csel w8, w16, w9, lt ; CHECK-NO16-NEXT: fcvtzs w11, s0 ; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: mov v1.s[1], w14 +; CHECK-NO16-NEXT: mov v2.s[1], w14 ; CHECK-NO16-NEXT: csel w8, w8, w10, gt -; CHECK-NO16-NEXT: mov v2.s[2], w12 +; CHECK-NO16-NEXT: mov v1.s[2], w12 ; CHECK-NO16-NEXT: cmp w11, w9 -; CHECK-NO16-NEXT: csel w9, w11, w9, lt -; CHECK-NO16-NEXT: mov v1.s[2], w8 -; CHECK-NO16-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: csel w8, w9, w10, gt -; CHECK-NO16-NEXT: mov v2.s[3], w13 -; CHECK-NO16-NEXT: mov v1.s[3], w8 -; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-NO16-NEXT: mov v2.s[2], w8 +; CHECK-NO16-NEXT: csel w8, w11, w9, lt +; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: csel w8, w8, w10, gt +; CHECK-NO16-NEXT: mov v1.s[3], w13 +; CHECK-NO16-NEXT: mov v2.s[3], w8 +; CHECK-NO16-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-NO16-NEXT: ret ; ; CHECK-FP16-LABEL: test_v8f16_sat: @@ -607,8 +607,8 @@ define <4 x i16> @test_v4f16_sat(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16_sat: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -628,8 +628,8 @@ define <4 x i32> @test_v4f16_i32_sat(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16_i32_sat: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -651,8 +651,8 @@ define <4 x i32> @test_extrasat(<4 x float> %f) { ; CHECK-LABEL: test_extrasat: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff ; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3 +; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %mul.i = fmul <4 x float> %f, Index: llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll =================================================================== --- llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll +++ llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll @@ -113,8 +113,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: rbit w9, w0 ; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: clz w9, w9 ; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: clz w9, w9 ; CHECK-NEXT: csel w0, w8, w9, eq ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/fold-global-offsets.ll =================================================================== --- llvm/test/CodeGen/AArch64/fold-global-offsets.ll +++ llvm/test/CodeGen/AArch64/fold-global-offsets.ll @@ -100,17 +100,17 @@ define i64 @f6() { ; CHECK-LABEL: f6: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1048576 -; CHECK-NEXT: adrp x9, x2 -; CHECK-NEXT: add x9, x9, :lo12:x2 -; CHECK-NEXT: ldr x0, [x9, x8] +; CHECK-NEXT: adrp x8, x2 +; CHECK-NEXT: add x8, x8, :lo12:x2 +; CHECK-NEXT: mov w9, #1048576 +; CHECK-NEXT: ldr x0, [x8, x9] ; CHECK-NEXT: ret ; ; GISEL-LABEL: f6: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #1048576 ; GISEL-NEXT: adrp x9, x2 ; GISEL-NEXT: add x9, x9, :lo12:x2 +; GISEL-NEXT: mov w8, #1048576 ; GISEL-NEXT: ldr x0, [x9, x8] ; GISEL-NEXT: ret %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 131072) Index: llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll +++ llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll @@ -235,22 +235,22 @@ ; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: mov s4, v1.s[2] -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: mov s1, v1.s[3] -; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: csetm w8, eq ; CHECK-NEXT: fcmp s3, s2 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: mov s2, v1.s[2] +; CHECK-NEXT: mov s3, v0.s[2] +; CHECK-NEXT: fmov s4, w8 +; CHECK-NEXT: mov s1, v1.s[3] ; CHECK-NEXT: csetm w8, eq -; CHECK-NEXT: fcmp s5, s4 -; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: mov s0, v0.s[3] +; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: fcmp s3, s2 ; CHECK-NEXT: csetm w8, eq ; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: mov v2.s[2], w8 +; CHECK-NEXT: mov v4.s[2], w8 ; CHECK-NEXT: csetm w8, eq -; CHECK-NEXT: mov v2.s[3], w8 -; CHECK-NEXT: xtn v0.4h, v2.4s +; CHECK-NEXT: mov v4.s[3], w8 +; CHECK-NEXT: xtn v0.4h, v4.4s ; CHECK-NEXT: ret entry: %val = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict") @@ -263,22 +263,22 @@ ; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcmpe s0, s1 -; CHECK-NEXT: mov s4, v1.s[2] -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: mov s1, v1.s[3] -; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: csetm w8, eq ; CHECK-NEXT: fcmpe s3, s2 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: mov s2, v1.s[2] +; CHECK-NEXT: mov s3, v0.s[2] +; CHECK-NEXT: fmov s4, w8 +; CHECK-NEXT: mov s1, v1.s[3] ; CHECK-NEXT: csetm w8, eq -; CHECK-NEXT: fcmpe s5, s4 -; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: mov s0, v0.s[3] +; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: fcmpe s3, s2 ; CHECK-NEXT: csetm w8, eq ; CHECK-NEXT: fcmpe s0, s1 -; CHECK-NEXT: mov v2.s[2], w8 +; CHECK-NEXT: mov v4.s[2], w8 ; CHECK-NEXT: csetm w8, eq -; CHECK-NEXT: mov v2.s[3], w8 -; CHECK-NEXT: xtn v0.4h, v2.4s +; CHECK-NEXT: mov v4.s[3], w8 +; CHECK-NEXT: xtn v0.4h, v4.4s ; CHECK-NEXT: ret entry: %val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict") Index: llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -741,57 +741,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, ne ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, ne +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, ne +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, ne ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_une: @@ -809,67 +809,67 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h6, v0.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc -; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcmp s7, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h7, v0.h[5] ; CHECK-CVT-NEXT: csetm w9, eq ; CHECK-CVT-NEXT: csinv w9, w9, wzr, vc -; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s4 +; CHECK-CVT-NEXT: fmov s16, w9 +; CHECK-CVT-NEXT: mov h4, v0.h[4] +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: mov v16.h[1], w8 +; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fmov s3, w9 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[1], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s2, s4 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[5] -; CHECK-CVT-NEXT: mov h6, v0.h[5] -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 +; CHECK-CVT-NEXT: fcvt s3, h4 ; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: fcvt s5, h7 +; CHECK-CVT-NEXT: mov v16.h[2], w8 +; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v3.h[3], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov v16.h[3], w8 ; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v16.h[4], w8 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[4], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 +; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v3.h[5], w8 +; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v16.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v3.h[6], w8 +; CHECK-CVT-NEXT: mov v16.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc -; CHECK-CVT-NEXT: mov v3.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v3.8h +; CHECK-CVT-NEXT: mov v16.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v16.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ueq: @@ -891,57 +891,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, hi ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, hi +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, hi +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, hi ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ugt: @@ -961,57 +961,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, pl ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, pl +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, pl +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, pl ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uge: @@ -1031,57 +1031,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, lt ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, lt +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, lt +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, lt ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ult: @@ -1101,57 +1101,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, le ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, le +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, le +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, le ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ule: @@ -1171,57 +1171,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, vs ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, vs +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, vs +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, vs ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uno: @@ -1241,67 +1241,67 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h6, v0.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le -; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcmp s7, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h7, v0.h[5] ; CHECK-CVT-NEXT: csetm w9, mi ; CHECK-CVT-NEXT: csinv w9, w9, wzr, le -; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s4 +; CHECK-CVT-NEXT: fmov s16, w9 +; CHECK-CVT-NEXT: mov h4, v0.h[4] +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: mov v16.h[1], w8 +; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: csinv w8, w8, wzr, le ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fmov s3, w9 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[1], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s2, s4 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[5] -; CHECK-CVT-NEXT: mov h6, v0.h[5] -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 +; CHECK-CVT-NEXT: fcvt s3, h4 ; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: fcvt s5, h7 +; CHECK-CVT-NEXT: mov v16.h[2], w8 +; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: csinv w8, w8, wzr, le +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v3.h[3], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov v16.h[3], w8 ; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: csinv w8, w8, wzr, le +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v16.h[4], w8 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[4], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 +; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v3.h[5], w8 +; CHECK-CVT-NEXT: csinv w8, w8, wzr, le +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v16.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v3.h[6], w8 +; CHECK-CVT-NEXT: mov v16.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le -; CHECK-CVT-NEXT: mov v3.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v3.8h +; CHECK-CVT-NEXT: mov v16.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v16.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_one: @@ -1322,57 +1322,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oeq: @@ -1391,57 +1391,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, gt ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, gt +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, gt +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, gt ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ogt: @@ -1460,57 +1460,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, ge ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, ge +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, ge +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, ge ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oge: @@ -1529,57 +1529,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_olt: @@ -1598,57 +1598,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, ls ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, ls +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, ls +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, ls ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ole: @@ -1667,57 +1667,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: csetm w8, vc ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, vc +; CHECK-CVT-NEXT: fmov s6, w9 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w8, vc +; CHECK-CVT-NEXT: mov v6.h[2], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v6.h[4], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: csetm w8, vc ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ord: Index: llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzs w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -30,8 +30,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -48,8 +48,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -283,14 +283,14 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-CVT-LABEL: utesth_f16i16: ; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-CVT-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: utesth_f16i16: @@ -308,14 +308,14 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-CVT-LABEL: ustest_f16i16: ; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-CVT-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: ustest_f16i16: @@ -372,8 +372,8 @@ ; CHECK-NEXT: csel x8, x0, xzr, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -425,8 +425,8 @@ ; CHECK-NEXT: csel x8, x8, xzr, ne ; CHECK-NEXT: cmp w11, #0 ; CHECK-NEXT: csel x9, x10, xzr, ne -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -448,8 +448,8 @@ ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -485,8 +485,8 @@ ; CHECK-NEXT: csel x8, x0, xzr, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -539,8 +539,8 @@ ; CHECK-NEXT: csel x8, x9, xzr, ne ; CHECK-NEXT: cmp w11, #0 ; CHECK-NEXT: csel x9, x10, xzr, ne -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -563,9 +563,9 @@ ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvtzs x8, s0 +; CHECK-CVT-NEXT: fcvtzs x9, s1 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s1 -; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: mov v0.d[1], x9 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i64: @@ -574,8 +574,8 @@ ; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: fcvtzs x8, h0 ; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h1 -; CHECK-FP16-NEXT: mov v0.d[1], x8 +; CHECK-FP16-NEXT: fcvtzs x9, h1 +; CHECK-FP16-NEXT: mov v0.d[1], x9 ; CHECK-FP16-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -611,8 +611,8 @@ ; CHECK-NEXT: csel x8, x0, xzr, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -665,8 +665,8 @@ ; CHECK-NEXT: csel x8, x9, xzr, ne ; CHECK-NEXT: cmp w11, #0 ; CHECK-NEXT: csel x9, x10, xzr, ne -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -691,8 +691,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzs w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -709,8 +709,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -726,8 +726,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -937,14 +937,14 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-CVT-LABEL: utesth_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-CVT-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: utesth_f16i16_mm: @@ -961,14 +961,14 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-CVT-LABEL: ustest_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-CVT-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h +; CHECK-CVT-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: ustest_f16i16_mm: @@ -1025,8 +1025,8 @@ ; CHECK-NEXT: csel x9, x19, xzr, eq ; CHECK-NEXT: cmp x20, #1 ; CHECK-NEXT: csel x9, xzr, x9, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -1070,9 +1070,9 @@ ; CHECK-NEXT: csel x10, x10, x11, eq ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: csel x9, x8, xzr, gt +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: csel x8, x8, x9, eq ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -1092,8 +1092,8 @@ ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -1131,8 +1131,8 @@ ; CHECK-NEXT: csel x9, x19, xzr, eq ; CHECK-NEXT: cmp x20, #1 ; CHECK-NEXT: csel x9, xzr, x9, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -1177,9 +1177,9 @@ ; CHECK-NEXT: csel x10, x10, x11, eq ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: csel x9, x8, xzr, gt +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: csel x8, x8, x9, eq ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -1200,9 +1200,9 @@ ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvtzs x8, s0 +; CHECK-CVT-NEXT: fcvtzs x9, s1 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s1 -; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: mov v0.d[1], x9 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i64_mm: @@ -1211,8 +1211,8 @@ ; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: fcvtzs x8, h0 ; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h1 -; CHECK-FP16-NEXT: mov v0.d[1], x8 +; CHECK-FP16-NEXT: fcvtzs x9, h1 +; CHECK-FP16-NEXT: mov v0.d[1], x9 ; CHECK-FP16-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -1250,8 +1250,8 @@ ; CHECK-NEXT: csel x9, x19, xzr, eq ; CHECK-NEXT: cmp x20, #1 ; CHECK-NEXT: csel x9, xzr, x9, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d1, x9 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -1296,9 +1296,9 @@ ; CHECK-NEXT: csel x10, x10, x11, eq ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: csel x9, x8, xzr, gt +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: csel x8, x8, x9, eq ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 Index: llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -311,9 +311,9 @@ ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov x8, #-4170333254945079296 ; CHECK-NEXT: mov x10, #34359738367 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov x8, #5053038781909696511 +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fcmp d8, d0 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov x8, #-34359738368 @@ -340,9 +340,9 @@ ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov x8, #-4044232465378705408 ; CHECK-NEXT: mov x10, #9223372036854775807 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov x8, #5179139571476070399 +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fcmp d8, d0 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov x8, #-9223372036854775808 Index: llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -166,8 +166,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzs w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f64.v2i32(<2 x double> %f) @@ -178,10 +178,10 @@ ; CHECK-LABEL: test_signed_v3f64_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzs w8, d2 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzs w8, d0 ; CHECK-NEXT: mov v0.s[3], w8 @@ -196,10 +196,10 @@ ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d2 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: fcvtzs w9, d2 ; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzs w8, d1 ; CHECK-NEXT: mov v0.s[3], w8 @@ -263,8 +263,8 @@ ; CHECK-NEXT: mov w8, #-2147483648 ; CHECK-NEXT: csel w19, w8, w0, lt ; CHECK-NEXT: adrp x8, .LCPI14_1 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2147483647 @@ -298,8 +298,8 @@ ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -307,10 +307,10 @@ ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: mov w20, #-2147483648 ; CHECK-NEXT: csel w19, w20, w0, lt -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -340,8 +340,8 @@ ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: csel w8, wzr, w19, ne -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[1], w22 ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -368,8 +368,8 @@ ; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -377,10 +377,10 @@ ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: adrp x8, .LCPI16_1 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: mov w20, #-2147483648 ; CHECK-NEXT: csel w19, w20, w0, lt -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -433,8 +433,8 @@ ; CHECK-NEXT: csel w8, wzr, w19, ne ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[2], w8 +; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptosi.sat.v3f128.v3i32(<3 x fp128> %f) @@ -458,8 +458,8 @@ ; CHECK-NEXT: stp q2, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 @@ -468,10 +468,10 @@ ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: adrp x8, .LCPI17_1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: mov w20, #-2147483648 ; CHECK-NEXT: csel w19, w20, w0, lt -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -543,8 +543,8 @@ ; CHECK-NEXT: csel w8, wzr, w19, ne ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptosi.sat.v4f128.v4i32(<4 x fp128> %f) @@ -640,8 +640,8 @@ ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptosi.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x @@ -654,10 +654,10 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: mov w5, v0.s[1] ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: mov w5, v0.s[1] ; CHECK-NEXT: mov w6, v0.s[2] ; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: fmov w4, s0 @@ -797,8 +797,8 @@ ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %x = call <2 x i64> @llvm.fptosi.sat.v2f32.v2i64(<2 x float> %f) ret <2 x i64> %x @@ -829,10 +829,10 @@ ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v9.2s, #241, lsl #24 ; CHECK-NEXT: mov w8, #1895825407 +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x21, #-34359738368 ; CHECK-NEXT: mov x22, #34359738367 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -895,10 +895,10 @@ ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v9.2s, #255, lsl #24 ; CHECK-NEXT: mov w8, #2130706431 +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x21, #-9223372036854775808 ; CHECK-NEXT: mov x22, #9223372036854775807 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -967,8 +967,8 @@ define <4 x i8> @test_signed_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #127 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #127 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #127 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -981,8 +981,8 @@ define <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #15, msl #8 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #15, msl #8 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #15, msl #8 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1005,8 +1005,8 @@ define <4 x i19> @test_signed_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #3, msl #16 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #3, msl #16 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #3, msl #16 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1062,15 +1062,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov s3, v0.s[1] -; CHECK-NEXT: fcvtzs x9, s0 +; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: mov s2, v1.s[1] -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x9, s3 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fcvtzs x8, s2 -; CHECK-NEXT: mov v0.d[1], x9 -; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: fcvtzs x9, s1 +; CHECK-NEXT: fcvtzs x11, s3 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 ; CHECK-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f) ret <4 x i64> %x @@ -1106,10 +1106,10 @@ ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v9.2s, #241, lsl #24 ; CHECK-NEXT: mov w8, #1895825407 +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x25, #-34359738368 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov x26, #34359738367 -; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -1169,10 +1169,10 @@ ; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret @@ -1210,10 +1210,10 @@ ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: movi v9.2s, #255, lsl #24 ; CHECK-NEXT: mov w8, #2130706431 +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x25, #-9223372036854775808 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov x26, #9223372036854775807 -; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -1273,10 +1273,10 @@ ; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret @@ -1422,8 +1422,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzs w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f64.v2i32(<2 x double> %f) @@ -1486,13 +1486,13 @@ ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov x8, #-4170333254945079296 ; CHECK-NEXT: mov x21, #-34359738368 -; CHECK-NEXT: mov x22, #34359738367 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: mov x8, #5053038781909696511 -; CHECK-NEXT: fcmp d8, d9 ; CHECK-NEXT: fmov d10, x8 +; CHECK-NEXT: mov x22, #34359738367 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp d8, d10 @@ -1552,13 +1552,13 @@ ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov x8, #-4044232465378705408 ; CHECK-NEXT: mov x21, #-9223372036854775808 -; CHECK-NEXT: mov x22, #9223372036854775807 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: mov x8, #5179139571476070399 -; CHECK-NEXT: fcmp d8, d9 ; CHECK-NEXT: fmov d10, x8 +; CHECK-NEXT: mov x22, #9223372036854775807 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp d8, d10 @@ -1732,21 +1732,21 @@ ; CHECK-CVT-NEXT: mov x11, #-562949953421312 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvtzs x9, s2 +; CHECK-CVT-NEXT: fcvt s2, h3 ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: cmp x9, x8 ; CHECK-CVT-NEXT: fcvtzs x10, s1 -; CHECK-CVT-NEXT: fcvt s1, h3 ; CHECK-CVT-NEXT: csel x9, x9, x8, lt ; CHECK-CVT-NEXT: cmp x9, x11 +; CHECK-CVT-NEXT: fcvtzs x12, s2 ; CHECK-CVT-NEXT: csel x0, x9, x11, gt ; CHECK-CVT-NEXT: cmp x10, x8 -; CHECK-CVT-NEXT: fcvtzs x9, s1 -; CHECK-CVT-NEXT: csel x10, x10, x8, lt -; CHECK-CVT-NEXT: cmp x10, x11 -; CHECK-CVT-NEXT: csel x1, x10, x11, gt +; CHECK-CVT-NEXT: csel x9, x10, x8, lt ; CHECK-CVT-NEXT: fcvtzs x10, s0 -; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: csel x9, x9, x8, lt +; CHECK-CVT-NEXT: cmp x9, x11 +; CHECK-CVT-NEXT: csel x1, x9, x11, gt +; CHECK-CVT-NEXT: cmp x12, x8 +; CHECK-CVT-NEXT: csel x9, x12, x8, lt ; CHECK-CVT-NEXT: cmp x9, x11 ; CHECK-CVT-NEXT: csel x2, x9, x11, gt ; CHECK-CVT-NEXT: cmp x10, x8 @@ -1761,22 +1761,22 @@ ; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: fcvtzs x9, h0 ; CHECK-FP16-NEXT: mov x8, #562949953421311 +; CHECK-FP16-NEXT: mov h2, v0.h[2] ; CHECK-FP16-NEXT: mov x11, #-562949953421312 +; CHECK-FP16-NEXT: mov h0, v0.h[3] ; CHECK-FP16-NEXT: cmp x9, x8 ; CHECK-FP16-NEXT: fcvtzs x10, h1 -; CHECK-FP16-NEXT: mov h1, v0.h[2] ; CHECK-FP16-NEXT: csel x9, x9, x8, lt -; CHECK-FP16-NEXT: mov h0, v0.h[3] ; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: fcvtzs x12, h2 ; CHECK-FP16-NEXT: csel x0, x9, x11, gt ; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: fcvtzs x9, h1 -; CHECK-FP16-NEXT: csel x10, x10, x8, lt -; CHECK-FP16-NEXT: cmp x10, x11 -; CHECK-FP16-NEXT: csel x1, x10, x11, gt +; CHECK-FP16-NEXT: csel x9, x10, x8, lt ; CHECK-FP16-NEXT: fcvtzs x10, h0 -; CHECK-FP16-NEXT: cmp x9, x8 -; CHECK-FP16-NEXT: csel x9, x9, x8, lt +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x1, x9, x11, gt +; CHECK-FP16-NEXT: cmp x12, x8 +; CHECK-FP16-NEXT: csel x9, x12, x8, lt ; CHECK-FP16-NEXT: cmp x9, x11 ; CHECK-FP16-NEXT: csel x2, x9, x11, gt ; CHECK-FP16-NEXT: cmp x10, x8 @@ -1792,37 +1792,37 @@ ; CHECK-CVT-LABEL: test_signed_v4f16_v4i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h1, v0.h[2] -; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: fcvt s3, h0 -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: mov h3, v0.h[3] +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzs x8, s3 -; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzs x8, s0 +; CHECK-CVT-NEXT: fcvtzs x10, s2 ; CHECK-CVT-NEXT: fcvtzs x9, s1 +; CHECK-CVT-NEXT: fcvtzs x11, s3 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s2 -; CHECK-CVT-NEXT: fmov d1, x9 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: mov v0.d[1], x8 -; CHECK-CVT-NEXT: mov v1.d[1], x9 +; CHECK-CVT-NEXT: fmov d1, x10 +; CHECK-CVT-NEXT: mov v0.d[1], x9 +; CHECK-CVT-NEXT: mov v1.d[1], x11 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h1, v0.h[2] -; CHECK-FP16-NEXT: mov h2, v0.h[1] +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: mov h3, v0.h[3] ; CHECK-FP16-NEXT: fcvtzs x8, h0 +; CHECK-FP16-NEXT: fcvtzs x10, h2 ; CHECK-FP16-NEXT: fcvtzs x9, h1 +; CHECK-FP16-NEXT: fcvtzs x11, h3 ; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h2 -; CHECK-FP16-NEXT: fmov d1, x9 -; CHECK-FP16-NEXT: fcvtzs x9, h3 -; CHECK-FP16-NEXT: mov v0.d[1], x8 -; CHECK-FP16-NEXT: mov v1.d[1], x9 +; CHECK-FP16-NEXT: fmov d1, x10 +; CHECK-FP16-NEXT: mov v0.d[1], x9 +; CHECK-FP16-NEXT: mov v1.d[1], x11 ; CHECK-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -1861,9 +1861,9 @@ ; CHECK-NEXT: movi v9.2s, #241, lsl #24 ; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x25, #-34359738368 ; CHECK-NEXT: mov x26, #34359738367 -; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -1922,11 +1922,11 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 @@ -1968,9 +1968,9 @@ ; CHECK-NEXT: movi v9.2s, #255, lsl #24 ; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x25, #-9223372036854775808 ; CHECK-NEXT: mov x26, #9223372036854775807 -; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -2029,11 +2029,11 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 @@ -2087,8 +2087,9 @@ ; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge ; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csel w11, w11, wzr, lt -; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: fcvtzs w14, s1 ; CHECK-CVT-NEXT: cmp w11, #0 +; CHECK-CVT-NEXT: fmov s1, w9 ; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge ; CHECK-CVT-NEXT: cmp w12, #0 ; CHECK-CVT-NEXT: csel w12, w12, wzr, lt @@ -2098,31 +2099,30 @@ ; CHECK-CVT-NEXT: csel w13, w13, wzr, lt ; CHECK-CVT-NEXT: cmp w13, #0 ; CHECK-CVT-NEXT: csinv w9, w13, wzr, ge -; CHECK-CVT-NEXT: fcvtzs w13, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w8 -; CHECK-CVT-NEXT: fmov s1, w9 -; CHECK-CVT-NEXT: cmp w13, #0 -; CHECK-CVT-NEXT: csel w8, w13, wzr, lt +; CHECK-CVT-NEXT: cmp w14, #0 +; CHECK-CVT-NEXT: fmov s2, w9 ; CHECK-CVT-NEXT: fcvtzs w9, s0 +; CHECK-CVT-NEXT: mov v1.s[1], w8 +; CHECK-CVT-NEXT: csel w8, w14, wzr, lt ; CHECK-CVT-NEXT: cmp w8, #0 -; CHECK-CVT-NEXT: mov v1.s[1], w12 +; CHECK-CVT-NEXT: mov v2.s[1], w12 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge ; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: csel w9, w9, wzr, lt -; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: mov v1.s[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge -; CHECK-CVT-NEXT: mov v2.s[3], w11 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: mov v1.s[2], w10 +; CHECK-CVT-NEXT: mov v2.s[2], w8 +; CHECK-CVT-NEXT: csel w8, w9, wzr, lt +; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge +; CHECK-CVT-NEXT: mov v1.s[3], w11 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: movi v2.2d, #0xffffffffffffffff ; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v2.8h @@ -2165,8 +2165,9 @@ ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, #127 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fcvtzs w16, s1 ; CHECK-CVT-NEXT: cmn w13, #128 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, #127 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt @@ -2176,24 +2177,23 @@ ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #128 ; CHECK-CVT-NEXT: csel w11, w15, w10, gt -; CHECK-CVT-NEXT: fcvtzs w15, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: cmp w15, #127 -; CHECK-CVT-NEXT: csel w9, w15, w8, lt +; CHECK-CVT-NEXT: cmp w16, #127 +; CHECK-CVT-NEXT: fmov s2, w11 ; CHECK-CVT-NEXT: fcvtzs w11, s0 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: csel w9, w16, w8, lt ; CHECK-CVT-NEXT: cmn w9, #128 -; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: mov v2.s[1], w14 ; CHECK-CVT-NEXT: csel w9, w9, w10, gt ; CHECK-CVT-NEXT: cmp w11, #127 +; CHECK-CVT-NEXT: mov v1.s[2], w12 ; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w12 ; CHECK-CVT-NEXT: cmn w8, #128 -; CHECK-CVT-NEXT: mov v1.s[2], w9 +; CHECK-CVT-NEXT: mov v2.s[2], w9 ; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -2239,8 +2239,9 @@ ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, #4095 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fcvtzs w16, s1 ; CHECK-CVT-NEXT: cmn w13, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, #4095 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt @@ -2250,24 +2251,23 @@ ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: csel w11, w15, w10, gt -; CHECK-CVT-NEXT: fcvtzs w15, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: cmp w15, #4095 -; CHECK-CVT-NEXT: csel w9, w15, w8, lt +; CHECK-CVT-NEXT: cmp w16, #4095 +; CHECK-CVT-NEXT: fmov s2, w11 ; CHECK-CVT-NEXT: fcvtzs w11, s0 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: csel w9, w16, w8, lt ; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: mov v2.s[1], w14 ; CHECK-CVT-NEXT: csel w9, w9, w10, gt ; CHECK-CVT-NEXT: cmp w11, #4095 +; CHECK-CVT-NEXT: mov v1.s[2], w12 ; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w12 ; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: mov v1.s[2], w9 +; CHECK-CVT-NEXT: mov v2.s[2], w9 ; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i13: @@ -2315,8 +2315,9 @@ ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, w8 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fcvtzs w16, s1 ; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, w8 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt @@ -2326,24 +2327,23 @@ ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: csel w11, w15, w10, gt -; CHECK-CVT-NEXT: fcvtzs w15, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: cmp w15, w8 -; CHECK-CVT-NEXT: csel w9, w15, w8, lt +; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: fmov s2, w11 ; CHECK-CVT-NEXT: fcvtzs w11, s0 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: csel w9, w16, w8, lt ; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: mov v2.s[1], w14 ; CHECK-CVT-NEXT: csel w9, w9, w10, gt ; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: mov v1.s[2], w12 ; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w12 ; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov v1.s[2], w9 +; CHECK-CVT-NEXT: mov v2.s[2], w9 ; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i16: @@ -2357,24 +2357,24 @@ define <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i19: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v2.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-NEXT: movi v1.4s, #3, msl #16 -; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: movi v2.4s, #3, msl #16 +; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mvni v1.4s, #3, msl #16 -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov w1, v2.s[1] -; CHECK-NEXT: mov w2, v2.s[2] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w3, v2.s[3] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: mov w7, v0.s[3] -; CHECK-NEXT: fmov w4, s0 -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: mvni v2.4s, #3, msl #16 +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: mov w6, v1.s[2] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w7, v1.s[3] +; CHECK-NEXT: mov w3, v0.s[3] +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x @@ -2514,63 +2514,63 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-CVT-NEXT: mov h2, v0.h[1] +; CHECK-CVT-NEXT: fcvt s3, h0 ; CHECK-CVT-NEXT: mov h4, v0.h[2] -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: fcvt s2, h1 -; CHECK-CVT-NEXT: mov h3, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: fcvt s5, h1 +; CHECK-CVT-NEXT: mov h7, v1.h[2] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: mov h6, v1.h[1] ; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvtzs x8, s3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h0 ; CHECK-CVT-NEXT: fcvtzs x9, s5 -; CHECK-CVT-NEXT: fcvtzs x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h3 -; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s5, h7 +; CHECK-CVT-NEXT: fcvtzs x10, s2 +; CHECK-CVT-NEXT: fcvt s2, h6 ; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fcvtzs x8, s3 +; CHECK-CVT-NEXT: fcvtzs x11, s5 +; CHECK-CVT-NEXT: mov v0.d[1], x10 ; CHECK-CVT-NEXT: fcvtzs x10, s2 -; CHECK-CVT-NEXT: fmov d2, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s4 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: fmov d0, x9 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: fcvtzs x10, s5 +; CHECK-CVT-NEXT: fmov d2, x9 +; CHECK-CVT-NEXT: fcvtzs x9, s4 ; CHECK-CVT-NEXT: fmov d1, x8 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: fcvtzs x8, s4 -; CHECK-CVT-NEXT: fmov d3, x10 -; CHECK-CVT-NEXT: fcvtzs x10, s6 -; CHECK-CVT-NEXT: mov v0.d[1], x9 -; CHECK-CVT-NEXT: mov v1.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: fcvtzs x8, s6 +; CHECK-CVT-NEXT: fmov d3, x11 +; CHECK-CVT-NEXT: mov v2.d[1], x10 +; CHECK-CVT-NEXT: mov v1.d[1], x9 +; CHECK-CVT-NEXT: mov v3.d[1], x8 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i64: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-FP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzs x8, h0 ; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzs x9, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: mov h4, v0.h[3] +; CHECK-FP16-NEXT: mov h5, v2.h[2] +; CHECK-FP16-NEXT: fmov d0, x8 ; CHECK-FP16-NEXT: fcvtzs x8, h1 -; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: mov h6, v1.h[3] -; CHECK-FP16-NEXT: fcvtzs x10, h2 -; CHECK-FP16-NEXT: fmov d2, x8 +; CHECK-FP16-NEXT: mov h1, v2.h[1] +; CHECK-FP16-NEXT: mov h6, v2.h[3] +; CHECK-FP16-NEXT: fcvtzs x9, h2 +; CHECK-FP16-NEXT: mov v0.d[1], x8 ; CHECK-FP16-NEXT: fcvtzs x8, h3 -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x9 -; CHECK-FP16-NEXT: mov v2.d[1], x10 -; CHECK-FP16-NEXT: fcvtzs x10, h4 +; CHECK-FP16-NEXT: fcvtzs x11, h5 +; CHECK-FP16-NEXT: fcvtzs x10, h1 +; CHECK-FP16-NEXT: fmov d2, x9 +; CHECK-FP16-NEXT: fcvtzs x9, h4 ; CHECK-FP16-NEXT: fmov d1, x8 -; CHECK-FP16-NEXT: fcvtzs x9, h3 -; CHECK-FP16-NEXT: fcvtzs x8, h5 -; CHECK-FP16-NEXT: fmov d3, x10 -; CHECK-FP16-NEXT: fcvtzs x10, h6 -; CHECK-FP16-NEXT: mov v0.d[1], x9 -; CHECK-FP16-NEXT: mov v1.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x10 +; CHECK-FP16-NEXT: fcvtzs x8, h6 +; CHECK-FP16-NEXT: fmov d3, x11 +; CHECK-FP16-NEXT: mov v2.d[1], x10 +; CHECK-FP16-NEXT: mov v1.d[1], x9 +; CHECK-FP16-NEXT: mov v3.d[1], x8 ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2612,18 +2612,18 @@ ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: movi v10.2s, #241, lsl #24 +; CHECK-NEXT: movi v9.2s, #241, lsl #24 ; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov x25, #-34359738368 -; CHECK-NEXT: mov x23, #34359738367 -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fmov s10, w8 +; CHECK-NEXT: mov x26, #-34359738368 +; CHECK-NEXT: mov x25, #34359738367 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x25, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x23, x9, gt +; CHECK-NEXT: csel x9, x26, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x25, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2633,12 +2633,12 @@ ; CHECK-NEXT: csel x8, xzr, x9, vs ; CHECK-NEXT: str x8, [sp, #72] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x25, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x23, x9, gt +; CHECK-NEXT: csel x9, x26, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x25, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2648,13 +2648,13 @@ ; CHECK-NEXT: str x8, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: csel x8, x25, x1, lt +; CHECK-NEXT: csel x8, x26, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x23, x8, gt +; CHECK-NEXT: csel x8, x25, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x24, xzr, x8, vs @@ -2663,13 +2663,13 @@ ; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: csel x8, x25, x1, lt +; CHECK-NEXT: csel x8, x26, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x23, x8, gt +; CHECK-NEXT: csel x8, x25, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x27, xzr, x8, vs @@ -2678,12 +2678,12 @@ ; CHECK-NEXT: str x8, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x25, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x23, x9, gt +; CHECK-NEXT: csel x9, x26, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x25, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2692,84 +2692,82 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x25, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x23, x9, gt +; CHECK-NEXT: csel x9, x26, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x25, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x20, xzr, x8, vs -; CHECK-NEXT: csel x28, xzr, x9, vs +; CHECK-NEXT: csel x23, xzr, x8, vs +; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: csel x8, x25, x1, lt +; CHECK-NEXT: csel x8, x26, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x23, x8, gt +; CHECK-NEXT: csel x8, x25, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x21, xzr, x8, vs -; CHECK-NEXT: csel x26, xzr, x9, vs +; CHECK-NEXT: csel x28, xzr, x8, vs +; CHECK-NEXT: csel x21, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fmov d0, x20 -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: lsr x10, x28, #28 -; CHECK-NEXT: ldr d1, [sp] // 8-byte Folded Reload -; CHECK-NEXT: lsr x12, x29, #28 -; CHECK-NEXT: mov v0.d[1], x28 -; CHECK-NEXT: csel x8, x25, x1, lt +; CHECK-NEXT: fmov d0, x23 +; CHECK-NEXT: lsr x10, x20, #28 +; CHECK-NEXT: csel x8, x26, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: stur x11, [x19, #75] -; CHECK-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x20 +; CHECK-NEXT: ldr d1, [sp] // 8-byte Folded Reload +; CHECK-NEXT: lsr x11, x29, #28 +; CHECK-NEXT: strb w10, [x19, #49] ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x23, x8, gt +; CHECK-NEXT: csel x8, x25, x8, gt ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fmov x11, d0 -; CHECK-NEXT: stur x13, [x19, #50] +; CHECK-NEXT: strb w11, [x19, #24] ; CHECK-NEXT: mov v1.d[1], x29 -; CHECK-NEXT: ldr d0, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: strb w10, [x19, #49] -; CHECK-NEXT: extr x10, x28, x11, #28 ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: bfi x8, x11, #36, #28 -; CHECK-NEXT: strb w12, [x19, #24] ; CHECK-NEXT: stur x9, [x19, #25] -; CHECK-NEXT: fmov x12, d1 -; CHECK-NEXT: stur x10, [x19, #41] -; CHECK-NEXT: lsr x9, x22, #28 -; CHECK-NEXT: ldr d1, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: bfi x8, x10, #36, #28 +; CHECK-NEXT: ldr x9, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: stur x8, [x19, #33] -; CHECK-NEXT: ldr x11, [sp, #72] // 8-byte Folded Reload -; CHECK-NEXT: extr x18, x29, x12, #28 +; CHECK-NEXT: lsr x8, x22, #28 +; CHECK-NEXT: stur x9, [x19, #50] +; CHECK-NEXT: extr x9, x20, x10, #28 +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: stur x9, [x19, #41] +; CHECK-NEXT: extr x9, x29, x10, #28 +; CHECK-NEXT: bfi x28, x10, #36, #28 +; CHECK-NEXT: str x9, [x19, #16] +; CHECK-NEXT: ldp d0, d1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x10, [sp, #72] // 8-byte Folded Reload +; CHECK-NEXT: strb w8, [x19, #99] +; CHECK-NEXT: stp x21, x28, [x19] ; CHECK-NEXT: mov v0.d[1], x22 -; CHECK-NEXT: bfi x21, x12, #36, #28 -; CHECK-NEXT: str x26, [x19] -; CHECK-NEXT: mov v1.d[1], x11 -; CHECK-NEXT: lsr x10, x11, #28 -; CHECK-NEXT: mov x13, x11 -; CHECK-NEXT: stp x21, x18, [x19, #8] +; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: lsr x9, x10, #28 +; CHECK-NEXT: strb w9, [x19, #74] ; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: strb w9, [x19, #99] -; CHECK-NEXT: strb w10, [x19, #74] -; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: extr x12, x22, x8, #28 +; CHECK-NEXT: extr x9, x22, x8, #28 ; CHECK-NEXT: bfi x27, x8, #36, #28 -; CHECK-NEXT: extr x8, x13, x11, #28 -; CHECK-NEXT: bfi x24, x11, #36, #28 -; CHECK-NEXT: stur x12, [x19, #91] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: stur x9, [x19, #91] ; CHECK-NEXT: stur x27, [x19, #83] -; CHECK-NEXT: stur x8, [x19, #66] +; CHECK-NEXT: extr x9, x10, x8, #28 +; CHECK-NEXT: bfi x24, x8, #36, #28 +; CHECK-NEXT: stur x9, [x19, #66] ; CHECK-NEXT: stur x24, [x19, #58] ; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload @@ -2820,18 +2818,18 @@ ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: movi v10.2s, #255, lsl #24 +; CHECK-NEXT: movi v9.2s, #255, lsl #24 ; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov x21, #-9223372036854775808 -; CHECK-NEXT: mov x22, #9223372036854775807 -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fmov s10, w8 +; CHECK-NEXT: mov x20, #-9223372036854775808 +; CHECK-NEXT: mov x23, #9223372036854775807 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x20, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2842,12 +2840,12 @@ ; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x20, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2856,13 +2854,13 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x20, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2872,12 +2870,12 @@ ; CHECK-NEXT: csel x8, xzr, x9, vs ; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x20, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2886,26 +2884,26 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x20, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x20, xzr, x8, vs -; CHECK-NEXT: csel x23, xzr, x9, vs +; CHECK-NEXT: csel x21, xzr, x8, vs +; CHECK-NEXT: csel x22, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x20, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2914,12 +2912,12 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x20, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2927,24 +2925,24 @@ ; CHECK-NEXT: csel x27, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: stp x26, x27, [x19, #32] ; CHECK-NEXT: stp x24, x25, [x19, #16] -; CHECK-NEXT: stp x20, x23, [x19] +; CHECK-NEXT: stp x21, x22, [x19] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: csel x9, x20, x1, lt +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: stp x28, x29, [x19, #112] -; CHECK-NEXT: ldr x10, [sp] // 8-byte Folded Reload -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: str x10, [x19, #104] -; CHECK-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: str x10, [x19, #96] ; CHECK-NEXT: stp x8, x9, [x19, #48] +; CHECK-NEXT: ldr x8, [sp] // 8-byte Folded Reload +; CHECK-NEXT: str x8, [x19, #104] +; CHECK-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: str x8, [x19, #96] ; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: str x8, [x19, #88] ; CHECK-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload @@ -3003,9 +3001,9 @@ define <16 x i8> @test_signed_v16f32_v16i8(<16 x float> %f) { ; CHECK-LABEL: test_signed_v16f32_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v4.4s, #127 ; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: movi v4.4s, #127 ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: mvni v5.4s, #127 @@ -3062,8 +3060,12 @@ ; CHECK-CVT-NEXT: mov w8, #127 ; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h ; CHECK-CVT-NEXT: mov w9, #-128 +; CHECK-CVT-NEXT: fcvtl2 v4.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s3, v2.s[1] ; CHECK-CVT-NEXT: fcvtzs w11, s2 +; CHECK-CVT-NEXT: fcvtzs w14, s1 +; CHECK-CVT-NEXT: fcvtzs w17, s4 ; CHECK-CVT-NEXT: fcvtzs w10, s3 ; CHECK-CVT-NEXT: mov s3, v2.s[2] ; CHECK-CVT-NEXT: mov s2, v2.s[3] @@ -3075,105 +3077,101 @@ ; CHECK-CVT-NEXT: csel w10, w10, w9, gt ; CHECK-CVT-NEXT: cmp w11, #127 ; CHECK-CVT-NEXT: csel w11, w11, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: fcvtzs w13, s2 ; CHECK-CVT-NEXT: cmn w11, #128 -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: csel w11, w11, w9, gt ; CHECK-CVT-NEXT: cmp w12, #127 ; CHECK-CVT-NEXT: csel w12, w12, w8, lt ; CHECK-CVT-NEXT: fcvtzs w15, s3 ; CHECK-CVT-NEXT: cmn w12, #128 -; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: csel w13, w12, w9, gt -; CHECK-CVT-NEXT: cmp w14, #127 -; CHECK-CVT-NEXT: csel w12, w14, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s1 -; CHECK-CVT-NEXT: cmn w12, #128 ; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: csel w12, w12, w9, gt +; CHECK-CVT-NEXT: cmp w13, #127 +; CHECK-CVT-NEXT: csel w13, w13, w8, lt +; CHECK-CVT-NEXT: fcvtzs w16, s2 +; CHECK-CVT-NEXT: cmn w13, #128 +; CHECK-CVT-NEXT: mov s3, v4.s[1] +; CHECK-CVT-NEXT: csel w13, w13, w9, gt ; CHECK-CVT-NEXT: cmp w15, #127 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s3 +; CHECK-CVT-NEXT: fcvtzs w18, s1 ; CHECK-CVT-NEXT: cmn w15, #128 -; CHECK-CVT-NEXT: mov s3, v2.s[1] +; CHECK-CVT-NEXT: mov s1, v4.s[2] ; CHECK-CVT-NEXT: csel w15, w15, w9, gt ; CHECK-CVT-NEXT: cmp w14, #127 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt -; CHECK-CVT-NEXT: fcvtzs w17, s1 +; CHECK-CVT-NEXT: fcvtzs w0, s3 ; CHECK-CVT-NEXT: cmn w14, #128 -; CHECK-CVT-NEXT: mov s1, v2.s[2] +; CHECK-CVT-NEXT: mov s2, v4.s[3] ; CHECK-CVT-NEXT: csel w14, w14, w9, gt ; CHECK-CVT-NEXT: cmp w16, #127 ; CHECK-CVT-NEXT: csel w16, w16, w8, lt -; CHECK-CVT-NEXT: fcvtzs w18, s3 +; CHECK-CVT-NEXT: fcvtzs w1, s1 ; CHECK-CVT-NEXT: cmn w16, #128 -; CHECK-CVT-NEXT: fcvtzs w0, s2 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: csel w16, w16, w9, gt -; CHECK-CVT-NEXT: cmp w17, #127 -; CHECK-CVT-NEXT: csel w17, w17, w8, lt -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: cmn w17, #128 -; CHECK-CVT-NEXT: mov s2, v2.s[3] -; CHECK-CVT-NEXT: csel w17, w17, w9, gt ; CHECK-CVT-NEXT: cmp w18, #127 ; CHECK-CVT-NEXT: csel w18, w18, w8, lt -; CHECK-CVT-NEXT: fcvtzs w1, s1 +; CHECK-CVT-NEXT: mov s3, v0.s[1] ; CHECK-CVT-NEXT: cmn w18, #128 -; CHECK-CVT-NEXT: mov s1, v0.s[1] ; CHECK-CVT-NEXT: csel w18, w18, w9, gt ; CHECK-CVT-NEXT: cmp w0, #127 ; CHECK-CVT-NEXT: csel w0, w0, w8, lt -; CHECK-CVT-NEXT: fcvtzs w2, s2 ; CHECK-CVT-NEXT: cmn w0, #128 -; CHECK-CVT-NEXT: fcvtzs w4, s0 -; CHECK-CVT-NEXT: csel w0, w0, w9, gt +; CHECK-CVT-NEXT: csel w11, w0, w9, gt +; CHECK-CVT-NEXT: cmp w17, #127 +; CHECK-CVT-NEXT: csel w17, w17, w8, lt +; CHECK-CVT-NEXT: fcvtzs w0, s2 +; CHECK-CVT-NEXT: cmn w17, #128 +; CHECK-CVT-NEXT: fmov s2, w14 +; CHECK-CVT-NEXT: mov v1.s[1], w10 +; CHECK-CVT-NEXT: csel w10, w17, w9, gt ; CHECK-CVT-NEXT: cmp w1, #127 -; CHECK-CVT-NEXT: csel w1, w1, w8, lt -; CHECK-CVT-NEXT: fcvtzs w3, s1 -; CHECK-CVT-NEXT: cmn w1, #128 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: csel w1, w1, w9, gt -; CHECK-CVT-NEXT: cmp w2, #127 -; CHECK-CVT-NEXT: csel w2, w2, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: cmn w2, #128 -; CHECK-CVT-NEXT: fmov s3, w14 -; CHECK-CVT-NEXT: csel w2, w2, w9, gt -; CHECK-CVT-NEXT: cmp w3, #127 -; CHECK-CVT-NEXT: csel w3, w3, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s1 -; CHECK-CVT-NEXT: cmn w3, #128 +; CHECK-CVT-NEXT: csel w17, w1, w8, lt +; CHECK-CVT-NEXT: fcvtzs w1, s3 +; CHECK-CVT-NEXT: cmn w17, #128 +; CHECK-CVT-NEXT: mov s3, v0.s[2] +; CHECK-CVT-NEXT: csel w17, w17, w9, gt +; CHECK-CVT-NEXT: cmp w0, #127 +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: csel w12, w0, w8, lt +; CHECK-CVT-NEXT: cmn w12, #128 +; CHECK-CVT-NEXT: fcvtzs w0, s0 +; CHECK-CVT-NEXT: csel w12, w12, w9, gt +; CHECK-CVT-NEXT: cmp w1, #127 +; CHECK-CVT-NEXT: csel w14, w1, w8, lt ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: csel w3, w3, w9, gt -; CHECK-CVT-NEXT: cmp w4, #127 -; CHECK-CVT-NEXT: csel w11, w4, w8, lt -; CHECK-CVT-NEXT: fmov s4, w0 -; CHECK-CVT-NEXT: cmn w11, #128 -; CHECK-CVT-NEXT: csel w11, w11, w9, gt -; CHECK-CVT-NEXT: cmp w14, #127 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w14, w8, lt -; CHECK-CVT-NEXT: mov v3.s[1], w15 +; CHECK-CVT-NEXT: cmn w14, #128 +; CHECK-CVT-NEXT: csel w14, w14, w9, gt +; CHECK-CVT-NEXT: cmp w0, #127 +; CHECK-CVT-NEXT: mov v2.s[1], w15 +; CHECK-CVT-NEXT: csel w15, w0, w8, lt +; CHECK-CVT-NEXT: cmn w15, #128 +; CHECK-CVT-NEXT: fcvtzs w0, s3 +; CHECK-CVT-NEXT: fmov s3, w10 +; CHECK-CVT-NEXT: csel w10, w15, w9, gt +; CHECK-CVT-NEXT: fmov s4, w10 +; CHECK-CVT-NEXT: cmp w0, #127 +; CHECK-CVT-NEXT: mov v3.s[1], w11 +; CHECK-CVT-NEXT: csel w10, w0, w8, lt +; CHECK-CVT-NEXT: mov v4.s[1], w14 ; CHECK-CVT-NEXT: cmn w10, #128 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: csel w10, w10, w9, gt ; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: mov v4.s[1], w18 -; CHECK-CVT-NEXT: mov v1.s[1], w3 +; CHECK-CVT-NEXT: csel w10, w10, w9, gt +; CHECK-CVT-NEXT: mov v2.s[2], w16 +; CHECK-CVT-NEXT: mov v3.s[2], w17 +; CHECK-CVT-NEXT: mov v4.s[2], w10 ; CHECK-CVT-NEXT: cmp w11, #127 ; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w13 ; CHECK-CVT-NEXT: cmn w8, #128 -; CHECK-CVT-NEXT: mov v3.s[2], w16 ; CHECK-CVT-NEXT: csel w8, w8, w9, gt -; CHECK-CVT-NEXT: mov v4.s[2], w1 -; CHECK-CVT-NEXT: mov v1.s[2], w10 -; CHECK-CVT-NEXT: mov v2.s[3], w12 -; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w2 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h -; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: mov v2.s[3], w18 +; CHECK-CVT-NEXT: mov v3.s[3], w12 +; CHECK-CVT-NEXT: mov v4.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h +; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v3.8h ; CHECK-CVT-NEXT: uzp1 v0.16b, v1.16b, v0.16b ; CHECK-CVT-NEXT: ret ; @@ -3195,8 +3193,12 @@ ; CHECK-CVT-NEXT: mov w8, #32767 ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov w9, #-32768 +; CHECK-CVT-NEXT: fcvtl2 v4.4s, v1.8h +; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h ; CHECK-CVT-NEXT: mov s3, v2.s[1] ; CHECK-CVT-NEXT: fcvtzs w11, s2 +; CHECK-CVT-NEXT: fcvtzs w14, s0 +; CHECK-CVT-NEXT: fcvtzs w17, s4 ; CHECK-CVT-NEXT: fcvtzs w10, s3 ; CHECK-CVT-NEXT: mov s3, v2.s[2] ; CHECK-CVT-NEXT: mov s2, v2.s[3] @@ -3208,105 +3210,101 @@ ; CHECK-CVT-NEXT: csel w10, w10, w9, gt ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w11, w11, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: fcvtzs w13, s2 ; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-NEXT: mov s2, v0.s[2] ; CHECK-CVT-NEXT: csel w11, w11, w9, gt ; CHECK-CVT-NEXT: cmp w12, w8 ; CHECK-CVT-NEXT: csel w12, w12, w8, lt ; CHECK-CVT-NEXT: fcvtzs w15, s3 ; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s3, v0.s[2] -; CHECK-CVT-NEXT: csel w13, w12, w9, gt -; CHECK-CVT-NEXT: cmp w14, w8 -; CHECK-CVT-NEXT: csel w12, w14, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s0 -; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w12, w12, w9, gt +; CHECK-CVT-NEXT: cmp w13, w8 +; CHECK-CVT-NEXT: csel w13, w13, w8, lt +; CHECK-CVT-NEXT: fcvtzs w16, s2 +; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s3, v4.s[1] +; CHECK-CVT-NEXT: csel w13, w13, w9, gt ; CHECK-CVT-NEXT: cmp w15, w8 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s3 +; CHECK-CVT-NEXT: fcvtzs w18, s0 ; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s3, v2.s[1] +; CHECK-CVT-NEXT: mov s0, v4.s[2] ; CHECK-CVT-NEXT: csel w15, w15, w9, gt ; CHECK-CVT-NEXT: cmp w14, w8 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt -; CHECK-CVT-NEXT: fcvtzs w17, s0 +; CHECK-CVT-NEXT: fcvtzs w0, s3 ; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fcvtl v0.4s, v1.4h +; CHECK-CVT-NEXT: mov s2, v4.s[3] ; CHECK-CVT-NEXT: csel w14, w14, w9, gt ; CHECK-CVT-NEXT: cmp w16, w8 ; CHECK-CVT-NEXT: csel w16, w16, w8, lt -; CHECK-CVT-NEXT: fcvtzs w18, s3 +; CHECK-CVT-NEXT: fcvtzs w1, s0 ; CHECK-CVT-NEXT: cmn w16, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s1, v2.s[2] +; CHECK-CVT-NEXT: fmov s0, w11 ; CHECK-CVT-NEXT: csel w16, w16, w9, gt -; CHECK-CVT-NEXT: cmp w17, w8 -; CHECK-CVT-NEXT: csel w17, w17, w8, lt -; CHECK-CVT-NEXT: fcvtzs w0, s2 -; CHECK-CVT-NEXT: cmn w17, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s2, v2.s[3] -; CHECK-CVT-NEXT: csel w17, w17, w9, gt ; CHECK-CVT-NEXT: cmp w18, w8 ; CHECK-CVT-NEXT: csel w18, w18, w8, lt -; CHECK-CVT-NEXT: fcvtzs w1, s1 +; CHECK-CVT-NEXT: mov s3, v1.s[1] ; CHECK-CVT-NEXT: cmn w18, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s1, v0.s[1] ; CHECK-CVT-NEXT: csel w18, w18, w9, gt ; CHECK-CVT-NEXT: cmp w0, w8 ; CHECK-CVT-NEXT: csel w0, w0, w8, lt -; CHECK-CVT-NEXT: fcvtzs w2, s2 ; CHECK-CVT-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fcvtzs w4, s0 -; CHECK-CVT-NEXT: csel w0, w0, w9, gt +; CHECK-CVT-NEXT: csel w11, w0, w9, gt +; CHECK-CVT-NEXT: cmp w17, w8 +; CHECK-CVT-NEXT: csel w17, w17, w8, lt +; CHECK-CVT-NEXT: fcvtzs w0, s2 +; CHECK-CVT-NEXT: cmn w17, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s2, w14 +; CHECK-CVT-NEXT: mov v0.s[1], w10 +; CHECK-CVT-NEXT: csel w10, w17, w9, gt ; CHECK-CVT-NEXT: cmp w1, w8 -; CHECK-CVT-NEXT: csel w1, w1, w8, lt -; CHECK-CVT-NEXT: fcvtzs w3, s1 -; CHECK-CVT-NEXT: cmn w1, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: csel w1, w1, w9, gt -; CHECK-CVT-NEXT: cmp w2, w8 -; CHECK-CVT-NEXT: csel w2, w2, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: cmn w2, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s3, w14 -; CHECK-CVT-NEXT: csel w2, w2, w9, gt -; CHECK-CVT-NEXT: cmp w3, w8 -; CHECK-CVT-NEXT: csel w3, w3, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s1 -; CHECK-CVT-NEXT: cmn w3, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: csel w3, w3, w9, gt -; CHECK-CVT-NEXT: cmp w4, w8 -; CHECK-CVT-NEXT: csel w11, w4, w8, lt -; CHECK-CVT-NEXT: fmov s4, w0 -; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: csel w11, w11, w9, gt -; CHECK-CVT-NEXT: cmp w14, w8 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w14, w8, lt -; CHECK-CVT-NEXT: mov v3.s[1], w15 +; CHECK-CVT-NEXT: csel w17, w1, w8, lt +; CHECK-CVT-NEXT: fcvtzs w1, s3 +; CHECK-CVT-NEXT: cmn w17, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s3, v1.s[2] +; CHECK-CVT-NEXT: csel w17, w17, w9, gt +; CHECK-CVT-NEXT: cmp w0, w8 +; CHECK-CVT-NEXT: mov v0.s[2], w12 +; CHECK-CVT-NEXT: csel w12, w0, w8, lt +; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fcvtzs w0, s1 +; CHECK-CVT-NEXT: csel w12, w12, w9, gt +; CHECK-CVT-NEXT: cmp w1, w8 +; CHECK-CVT-NEXT: csel w14, w1, w8, lt +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w14, w14, w9, gt +; CHECK-CVT-NEXT: cmp w0, w8 +; CHECK-CVT-NEXT: mov v2.s[1], w15 +; CHECK-CVT-NEXT: csel w15, w0, w8, lt +; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fcvtzs w0, s3 +; CHECK-CVT-NEXT: fmov s3, w10 +; CHECK-CVT-NEXT: csel w10, w15, w9, gt +; CHECK-CVT-NEXT: fmov s4, w10 +; CHECK-CVT-NEXT: cmp w0, w8 +; CHECK-CVT-NEXT: mov v3.s[1], w11 +; CHECK-CVT-NEXT: csel w10, w0, w8, lt +; CHECK-CVT-NEXT: mov v4.s[1], w14 ; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s1, w11 +; CHECK-CVT-NEXT: fcvtzs w11, s1 ; CHECK-CVT-NEXT: csel w10, w10, w9, gt -; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: mov v4.s[1], w18 -; CHECK-CVT-NEXT: mov v1.s[1], w3 +; CHECK-CVT-NEXT: mov v2.s[2], w16 +; CHECK-CVT-NEXT: mov v3.s[2], w17 +; CHECK-CVT-NEXT: mov v4.s[2], w10 ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w13 ; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov v3.s[2], w16 ; CHECK-CVT-NEXT: csel w8, w8, w9, gt -; CHECK-CVT-NEXT: mov v4.s[2], w1 -; CHECK-CVT-NEXT: mov v1.s[2], w10 -; CHECK-CVT-NEXT: mov v2.s[3], w12 -; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w2 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h -; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-CVT-NEXT: mov v0.s[3], w13 +; CHECK-CVT-NEXT: mov v2.s[3], w18 +; CHECK-CVT-NEXT: mov v3.s[3], w12 +; CHECK-CVT-NEXT: mov v4.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v3.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v16f16_v16i16: @@ -3322,62 +3320,62 @@ ; CHECK-LABEL: test_signed_v8f64_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: mov w9, #127 ; CHECK-NEXT: fcvtzs w10, d3 ; CHECK-NEXT: mov w11, #-128 ; CHECK-NEXT: mov d3, v1.d[1] ; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: fcvtzs w15, d1 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzs w9, d4 +; CHECK-NEXT: fcvtzs w8, d4 ; CHECK-NEXT: mov d4, v2.d[1] ; CHECK-NEXT: fcvtzs w14, d3 -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmp w8, #127 +; CHECK-NEXT: fcvtzs w16, d1 +; CHECK-NEXT: csel w8, w8, w9, lt ; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: cmn w9, #128 -; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: cmn w8, #128 +; CHECK-NEXT: csel w8, w8, w11, gt ; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: csel w10, w10, w9, lt ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w11, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: csel w12, w12, w9, lt +; CHECK-NEXT: fmov s4, w10 ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: csel w12, w12, w11, gt ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: fmov s5, w10 +; CHECK-NEXT: csel w13, w13, w9, lt ; CHECK-NEXT: cmn w13, #128 ; CHECK-NEXT: csel w13, w13, w11, gt ; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: csel w14, w14, w8, lt +; CHECK-NEXT: csel w14, w14, w9, lt ; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w10, w14, w11, gt +; CHECK-NEXT: csel w14, w14, w11, gt ; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: fcvtzs w14, d1 -; CHECK-NEXT: csel w15, w15, w8, lt +; CHECK-NEXT: csel w15, w15, w9, lt +; CHECK-NEXT: mov v4.s[1], w8 ; CHECK-NEXT: cmn w15, #128 -; CHECK-NEXT: mov v5.s[1], w9 -; CHECK-NEXT: csel w9, w15, w11, gt -; CHECK-NEXT: cmp w14, #127 +; CHECK-NEXT: fmov s3, w13 +; CHECK-NEXT: csel w10, w15, w11, gt +; CHECK-NEXT: cmp w16, #127 ; CHECK-NEXT: fcvtzs w15, d0 -; CHECK-NEXT: fmov s4, w13 -; CHECK-NEXT: csel w13, w14, w8, lt -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov v4.s[1], w12 -; CHECK-NEXT: csel w8, w15, w8, lt -; CHECK-NEXT: fmov s3, w9 +; CHECK-NEXT: csel w8, w16, w9, lt ; CHECK-NEXT: cmn w8, #128 +; CHECK-NEXT: mov v3.s[1], w12 ; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: cmp w15, #127 +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: csel w9, w15, w9, lt +; CHECK-NEXT: cmn w9, #128 +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: mov v1.s[1], w8 ; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: mov v2.s[1], w13 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: tbl v0.8b, { v2.16b, v3.16b, v4.16b, v5.16b }, v0.8b +; CHECK-NEXT: tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptosi.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -3387,141 +3385,141 @@ ; CHECK-LABEL: test_signed_v16f64_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: mov w9, #127 ; CHECK-NEXT: fcvtzs w11, d0 -; CHECK-NEXT: mov w9, #-128 -; CHECK-NEXT: fcvtzs w13, d1 -; CHECK-NEXT: mov d0, v2.d[1] +; CHECK-NEXT: mov w8, #-128 +; CHECK-NEXT: fcvtzs w12, d1 ; CHECK-NEXT: fcvtzs w14, d2 ; CHECK-NEXT: fcvtzs w10, d16 ; CHECK-NEXT: mov d16, v1.d[1] -; CHECK-NEXT: mov d1, v3.d[1] -; CHECK-NEXT: fcvtzs w15, d0 +; CHECK-NEXT: mov d1, v2.d[1] ; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: fcvtzs w12, d16 +; CHECK-NEXT: csel w10, w10, w9, lt +; CHECK-NEXT: fcvtzs w13, d16 ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: csel w10, w10, w8, gt ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: csel w11, w11, w8, lt +; CHECK-NEXT: csel w11, w11, w9, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: csel w12, w12, w9, gt +; CHECK-NEXT: csel w11, w11, w8, gt ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w13, w13, w8, lt ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: csel w11, w13, w9, gt -; CHECK-NEXT: cmp w15, #127 +; CHECK-NEXT: csel w11, w13, w9, lt +; CHECK-NEXT: cmn w11, #128 +; CHECK-NEXT: fcvtzs w13, d1 +; CHECK-NEXT: csel w11, w11, w8, gt +; CHECK-NEXT: cmp w12, #127 ; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: csel w10, w15, w8, lt +; CHECK-NEXT: csel w10, w12, w9, lt ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w13, d3 -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: mov d1, v3.d[1] +; CHECK-NEXT: csel w10, w10, w8, gt +; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: csel w13, w13, w9, lt +; CHECK-NEXT: mov w10, v0.s[1] +; CHECK-NEXT: cmn w13, #128 +; CHECK-NEXT: fcvtzs w12, d3 +; CHECK-NEXT: mov v2.s[1], w11 +; CHECK-NEXT: csel w11, w13, w8, gt ; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: fcvtzs w11, d1 -; CHECK-NEXT: mov w15, v0.s[1] -; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: mov v2.s[1], w12 +; CHECK-NEXT: fcvtzs w13, d1 +; CHECK-NEXT: csel w14, w14, w9, lt ; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w12, w14, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: csel w11, w11, w8, lt -; CHECK-NEXT: mov d1, v4.d[1] -; CHECK-NEXT: mov v0.b[1], w15 -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: fmov w14, s2 -; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: mov v0.b[1], w10 +; CHECK-NEXT: csel w14, w14, w8, gt +; CHECK-NEXT: fmov s1, w14 ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov w12, v2.s[1] -; CHECK-NEXT: csel w13, w13, w8, lt +; CHECK-NEXT: fmov w14, s2 +; CHECK-NEXT: mov w10, v2.s[1] +; CHECK-NEXT: mov d2, v4.d[1] +; CHECK-NEXT: csel w13, w13, w9, lt +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: cmn w13, #128 ; CHECK-NEXT: mov v0.b[2], w14 +; CHECK-NEXT: csel w11, w13, w8, gt +; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: csel w12, w12, w9, lt +; CHECK-NEXT: fcvtzs w13, d2 +; CHECK-NEXT: cmn w12, #128 +; CHECK-NEXT: fmov w14, s1 +; CHECK-NEXT: mov v0.b[3], w10 +; CHECK-NEXT: csel w12, w12, w8, gt +; CHECK-NEXT: mov w10, v1.s[1] +; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: csel w13, w13, w9, lt ; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: fcvtzs w15, d1 +; CHECK-NEXT: mov d2, v5.d[1] +; CHECK-NEXT: mov v0.b[4], w14 ; CHECK-NEXT: fcvtzs w14, d4 -; CHECK-NEXT: mov d1, v5.d[1] -; CHECK-NEXT: mov v0.b[3], w12 -; CHECK-NEXT: fmov s4, w13 -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: fmov w13, s3 -; CHECK-NEXT: csel w10, w15, w8, lt -; CHECK-NEXT: mov w12, v3.s[1] -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: csel w11, w13, w8, gt ; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: mov v0.b[4], w13 -; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: mov v4.s[1], w11 -; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w14, w14, w9, gt -; CHECK-NEXT: fcvtzs w13, d5 -; CHECK-NEXT: cmp w15, #127 +; CHECK-NEXT: fcvtzs w13, d2 +; CHECK-NEXT: csel w12, w14, w9, lt ; CHECK-NEXT: mov d2, v6.d[1] -; CHECK-NEXT: mov v0.b[5], w12 -; CHECK-NEXT: csel w11, w15, w8, lt -; CHECK-NEXT: fmov w12, s4 -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: fmov s1, w14 -; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: mov v0.b[5], w10 +; CHECK-NEXT: cmn w12, #128 +; CHECK-NEXT: csel w12, w12, w8, gt +; CHECK-NEXT: fmov w14, s1 +; CHECK-NEXT: mov w10, v1.s[1] +; CHECK-NEXT: fmov s1, w12 ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov w14, v4.s[1] -; CHECK-NEXT: mov v0.b[6], w12 -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: fcvtzs w15, d2 -; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: fcvtzs w10, d6 -; CHECK-NEXT: mov v0.b[7], w14 -; CHECK-NEXT: cmp w15, #127 +; CHECK-NEXT: fcvtzs w12, d5 +; CHECK-NEXT: mov v0.b[6], w14 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: csel w11, w13, w9, lt +; CHECK-NEXT: cmn w11, #128 +; CHECK-NEXT: csel w11, w11, w8, gt +; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: mov v0.b[7], w10 +; CHECK-NEXT: fcvtzs w10, d2 +; CHECK-NEXT: csel w12, w12, w9, lt ; CHECK-NEXT: fmov w14, s1 -; CHECK-NEXT: csel w12, w15, w8, lt -; CHECK-NEXT: fmov s2, w13 -; CHECK-NEXT: mov w13, v1.s[1] -; CHECK-NEXT: mov d1, v7.d[1] ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: fcvtzs w15, d7 -; CHECK-NEXT: csel w12, w12, w9, gt +; CHECK-NEXT: mov w13, v1.s[1] +; CHECK-NEXT: csel w12, w12, w8, gt ; CHECK-NEXT: cmp w10, #127 ; CHECK-NEXT: mov v0.b[8], w14 -; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: mov v2.s[1], w11 +; CHECK-NEXT: fcvtzs w14, d6 +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: csel w10, w10, w9, lt ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w11, d1 -; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: mov d2, v7.d[1] +; CHECK-NEXT: csel w10, w10, w8, gt +; CHECK-NEXT: cmp w14, #127 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: csel w11, w14, w9, lt +; CHECK-NEXT: cmn w11, #128 +; CHECK-NEXT: csel w11, w11, w8, gt ; CHECK-NEXT: mov v0.b[9], w13 -; CHECK-NEXT: fmov w14, s2 +; CHECK-NEXT: mov w12, v1.s[1] +; CHECK-NEXT: fmov w13, s1 +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: fcvtzs w11, d2 +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: fcvtzs w10, d7 ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: mov w13, v2.s[1] -; CHECK-NEXT: mov v0.b[10], w14 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov v1.s[1], w12 -; CHECK-NEXT: csel w8, w15, w8, lt -; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: mov v0.b[11], w13 +; CHECK-NEXT: mov v0.b[10], w13 +; CHECK-NEXT: csel w11, w11, w9, lt +; CHECK-NEXT: cmn w11, #128 +; CHECK-NEXT: csel w11, w11, w8, gt +; CHECK-NEXT: cmp w10, #127 +; CHECK-NEXT: csel w9, w10, w9, lt +; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: mov v0.b[11], w12 +; CHECK-NEXT: cmn w9, #128 +; CHECK-NEXT: csel w8, w9, w8, gt +; CHECK-NEXT: mov w12, v1.s[1] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov v0.b[12], w10 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: mov v0.b[13], w12 ; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov v0.b[12], w9 -; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: mov v0.b[13], w8 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov w9, v2.s[1] -; CHECK-NEXT: mov v0.b[14], w8 -; CHECK-NEXT: mov v0.b[15], w9 +; CHECK-NEXT: mov v0.b[14], w9 +; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptosi.sat.v16f64.v16i8(<16 x double> %f) ret <16 x i8> %x @@ -3531,62 +3529,62 @@ ; CHECK-LABEL: test_signed_v8f64_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: mov w9, #32767 ; CHECK-NEXT: fcvtzs w10, d3 ; CHECK-NEXT: mov w11, #-32768 ; CHECK-NEXT: mov d3, v1.d[1] ; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: fcvtzs w15, d1 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzs w9, d4 +; CHECK-NEXT: fcvtzs w8, d4 ; CHECK-NEXT: mov d4, v2.d[1] ; CHECK-NEXT: fcvtzs w14, d3 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: fcvtzs w16, d1 +; CHECK-NEXT: csel w8, w8, w9, lt ; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w9, w9, w11, gt -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: cmp w10, w9 +; CHECK-NEXT: csel w10, w10, w9, lt ; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w10, w10, w11, gt -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: cmp w12, w9 +; CHECK-NEXT: csel w12, w12, w9, lt +; CHECK-NEXT: fmov s4, w10 ; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w12, w12, w11, gt -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: fmov s5, w10 +; CHECK-NEXT: cmp w13, w9 +; CHECK-NEXT: csel w13, w13, w9, lt ; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: csel w14, w14, w8, lt +; CHECK-NEXT: cmp w14, w9 +; CHECK-NEXT: csel w14, w14, w9, lt ; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w10, w14, w11, gt -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: fcvtzs w14, d1 -; CHECK-NEXT: csel w15, w15, w8, lt +; CHECK-NEXT: csel w14, w14, w11, gt +; CHECK-NEXT: cmp w15, w9 +; CHECK-NEXT: csel w15, w15, w9, lt +; CHECK-NEXT: mov v4.s[1], w8 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v5.s[1], w9 -; CHECK-NEXT: csel w9, w15, w11, gt -; CHECK-NEXT: cmp w14, w8 +; CHECK-NEXT: fmov s3, w13 +; CHECK-NEXT: csel w10, w15, w11, gt +; CHECK-NEXT: cmp w16, w9 ; CHECK-NEXT: fcvtzs w15, d0 -; CHECK-NEXT: fmov s4, w13 -; CHECK-NEXT: csel w13, w14, w8, lt -; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: mov v4.s[1], w12 -; CHECK-NEXT: csel w8, w15, w8, lt -; CHECK-NEXT: fmov s3, w9 +; CHECK-NEXT: csel w8, w16, w9, lt ; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v3.s[1], w12 ; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: cmp w15, w9 +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: csel w9, w15, w9, lt +; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: mov v1.s[1], w8 ; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: mov v2.s[1], w13 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptosi.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -3599,112 +3597,112 @@ ; CHECK-NEXT: mov w9, #32767 ; CHECK-NEXT: fcvtzs w11, d3 ; CHECK-NEXT: mov w8, #-32768 -; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: fcvtzs w14, d2 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: mov d1, v7.d[1] +; CHECK-NEXT: fcvtzs w12, d2 +; CHECK-NEXT: fcvtzs w14, d1 +; CHECK-NEXT: fcvtzs w16, d0 ; CHECK-NEXT: fcvtzs w10, d16 ; CHECK-NEXT: mov d16, v2.d[1] -; CHECK-NEXT: mov d2, v0.d[1] -; CHECK-NEXT: fcvtzs w18, d0 -; CHECK-NEXT: mov d0, v6.d[1] -; CHECK-NEXT: fcvtzs w0, d7 +; CHECK-NEXT: mov d2, v1.d[1] +; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: cmp w10, w9 -; CHECK-NEXT: fcvtzs w2, d6 ; CHECK-NEXT: csel w10, w10, w9, lt -; CHECK-NEXT: fcvtzs w12, d16 +; CHECK-NEXT: fcvtzs w13, d16 ; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-NEXT: fcvtzs w17, d2 +; CHECK-NEXT: fcvtzs w17, d1 ; CHECK-NEXT: csel w10, w10, w8, gt ; CHECK-NEXT: cmp w11, w9 ; CHECK-NEXT: csel w11, w11, w9, lt -; CHECK-NEXT: fcvtzs w1, d0 +; CHECK-NEXT: mov d16, v7.d[1] ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NEXT: mov d0, v4.d[1] -; CHECK-NEXT: csel w13, w11, w8, gt -; CHECK-NEXT: cmp w12, w9 -; CHECK-NEXT: csel w11, w12, w9, lt -; CHECK-NEXT: fcvtzs w12, d3 +; CHECK-NEXT: csel w15, w11, w8, gt +; CHECK-NEXT: cmp w13, w9 +; CHECK-NEXT: csel w11, w13, w9, lt +; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-NEXT: fmov s3, w15 ; CHECK-NEXT: csel w11, w11, w8, gt -; CHECK-NEXT: cmp w14, w9 -; CHECK-NEXT: csel w14, w14, w9, lt -; CHECK-NEXT: fmov s19, w13 -; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w14, w14, w8, gt ; CHECK-NEXT: cmp w12, w9 ; CHECK-NEXT: csel w12, w12, w9, lt +; CHECK-NEXT: fcvtzs w15, d7 ; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-NEXT: mov d7, v6.d[1] ; CHECK-NEXT: csel w12, w12, w8, gt +; CHECK-NEXT: cmp w13, w9 +; CHECK-NEXT: csel w13, w13, w9, lt +; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w13, w13, w8, gt +; CHECK-NEXT: cmp w14, w9 +; CHECK-NEXT: csel w14, w14, w9, lt +; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w14, w14, w8, gt +; CHECK-NEXT: cmp w17, w9 +; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: csel w10, w17, w9, lt +; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NEXT: fcvtzs w17, d16 +; CHECK-NEXT: csel w10, w10, w8, gt +; CHECK-NEXT: cmp w16, w9 +; CHECK-NEXT: csel w16, w16, w9, lt +; CHECK-NEXT: fmov s2, w12 +; CHECK-NEXT: cmn w16, #8, lsl #12 // =32768 +; CHECK-NEXT: mov d16, v5.d[1] +; CHECK-NEXT: csel w12, w16, w8, gt +; CHECK-NEXT: cmp w17, w9 +; CHECK-NEXT: csel w17, w17, w9, lt +; CHECK-NEXT: fcvtzs w16, d7 +; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v2.s[1], w11 +; CHECK-NEXT: csel w11, w17, w8, gt ; CHECK-NEXT: cmp w15, w9 +; CHECK-NEXT: fcvtzs w17, d6 ; CHECK-NEXT: csel w15, w15, w9, lt +; CHECK-NEXT: fmov s1, w14 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w16, w15, w8, gt -; CHECK-NEXT: cmp w17, w9 -; CHECK-NEXT: csel w15, w17, w9, lt -; CHECK-NEXT: fcvtzs w17, d1 -; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: mov d1, v5.d[1] -; CHECK-NEXT: csel w15, w15, w8, gt -; CHECK-NEXT: cmp w18, w9 -; CHECK-NEXT: csel w18, w18, w9, lt -; CHECK-NEXT: cmn w18, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w18, w18, w8, gt +; CHECK-NEXT: csel w14, w15, w8, gt +; CHECK-NEXT: cmp w16, w9 +; CHECK-NEXT: csel w16, w16, w9, lt +; CHECK-NEXT: fcvtzs w15, d16 +; CHECK-NEXT: cmn w16, #8, lsl #12 // =32768 +; CHECK-NEXT: fmov s19, w14 +; CHECK-NEXT: mov v1.s[1], w13 +; CHECK-NEXT: csel w13, w16, w8, gt ; CHECK-NEXT: cmp w17, w9 +; CHECK-NEXT: fcvtzs w16, d5 ; CHECK-NEXT: csel w17, w17, w9, lt +; CHECK-NEXT: mov d5, v4.d[1] ; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 +; CHECK-NEXT: fmov s0, w12 ; CHECK-NEXT: csel w17, w17, w8, gt -; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: csel w0, w0, w9, lt -; CHECK-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w0, w8, gt -; CHECK-NEXT: cmp w1, w9 -; CHECK-NEXT: csel w1, w1, w9, lt -; CHECK-NEXT: fcvtzs w0, d1 -; CHECK-NEXT: cmn w1, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v19.s[1], w10 -; CHECK-NEXT: csel w10, w1, w8, gt -; CHECK-NEXT: cmp w2, w9 -; CHECK-NEXT: fcvtzs w1, d5 -; CHECK-NEXT: csel w2, w2, w9, lt -; CHECK-NEXT: fmov s18, w14 -; CHECK-NEXT: cmn w2, #8, lsl #12 // =32768 -; CHECK-NEXT: fmov s23, w13 -; CHECK-NEXT: csel w2, w2, w8, gt -; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: csel w14, w0, w9, lt -; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w14, w8, gt -; CHECK-NEXT: cmp w1, w9 -; CHECK-NEXT: fcvtzs w14, d0 -; CHECK-NEXT: csel w0, w1, w9, lt -; CHECK-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v18.s[1], w11 -; CHECK-NEXT: csel w11, w0, w8, gt -; CHECK-NEXT: mov v23.s[1], w17 +; CHECK-NEXT: cmp w15, w9 +; CHECK-NEXT: csel w12, w15, w9, lt +; CHECK-NEXT: fcvtzs w14, d4 +; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: csel w10, w12, w8, gt +; CHECK-NEXT: cmp w16, w9 +; CHECK-NEXT: fcvtzs w12, d5 +; CHECK-NEXT: csel w15, w16, w9, lt +; CHECK-NEXT: mov v19.s[1], w11 +; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 +; CHECK-NEXT: fmov s18, w17 +; CHECK-NEXT: csel w11, w15, w8, gt +; CHECK-NEXT: cmp w12, w9 +; CHECK-NEXT: csel w12, w12, w9, lt +; CHECK-NEXT: mov v18.s[1], w13 +; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w12, w12, w8, gt ; CHECK-NEXT: cmp w14, w9 -; CHECK-NEXT: fcvtzs w17, d4 -; CHECK-NEXT: csel w14, w14, w9, lt -; CHECK-NEXT: fmov s22, w2 -; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w14, w14, w8, gt -; CHECK-NEXT: fmov s17, w16 -; CHECK-NEXT: cmp w17, w9 -; CHECK-NEXT: mov v22.s[1], w10 -; CHECK-NEXT: csel w9, w17, w9, lt -; CHECK-NEXT: fmov s21, w11 +; CHECK-NEXT: fmov s17, w11 +; CHECK-NEXT: csel w9, w14, w9, lt ; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: adrp x9, .LCPI85_0 -; CHECK-NEXT: mov v17.s[1], w12 -; CHECK-NEXT: mov v21.s[1], w13 -; CHECK-NEXT: fmov s16, w18 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI85_0] -; CHECK-NEXT: fmov s20, w8 -; CHECK-NEXT: mov v16.s[1], w15 -; CHECK-NEXT: mov v20.s[1], w14 -; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-NEXT: mov v17.s[1], w10 +; CHECK-NEXT: adrp x10, .LCPI85_0 +; CHECK-NEXT: fmov s16, w8 +; CHECK-NEXT: ldr q4, [x10, :lo12:.LCPI85_0] +; CHECK-NEXT: mov v16.s[1], w12 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v4.16b ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptosi.sat.v16f64.v16i16(<16 x double> %f) ret <16 x i16> %x Index: llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -115,9 +115,9 @@ ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: mov x10, #68719476735 -; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s0 @@ -138,8 +138,8 @@ ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s0 @@ -264,9 +264,9 @@ ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: mov x8, #5057542381537067007 ; CHECK-NEXT: fcmp d8, #0.0 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: mov x10, #68719476735 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp d8, d0 @@ -287,8 +287,8 @@ ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: mov x8, #5183643171103440895 ; CHECK-NEXT: fcmp d8, #0.0 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp d8, d0 @@ -479,9 +479,9 @@ ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: mov x10, #68719476735 -; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s0 @@ -503,8 +503,8 @@ ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s0 Index: llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -166,8 +166,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f) @@ -178,10 +178,10 @@ ; CHECK-LABEL: test_unsigned_v3f64_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzu w8, d2 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzu w8, d0 ; CHECK-NEXT: mov v0.s[3], w8 @@ -196,10 +196,10 @@ ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d2 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: fcvtzu w9, d2 ; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzu w8, d1 ; CHECK-NEXT: mov v0.s[3], w8 @@ -261,9 +261,9 @@ ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: adrp x8, .LCPI14_1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w8, w19, wzr, le @@ -288,8 +288,8 @@ ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -297,9 +297,9 @@ ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -342,8 +342,8 @@ ; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -351,9 +351,9 @@ ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: adrp x8, .LCPI16_1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload @@ -410,9 +410,9 @@ ; CHECK-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: str q3, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: str q3, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -420,9 +420,9 @@ ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: adrp x8, .LCPI17_1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] ; CHECK-NEXT: str q1, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload @@ -453,8 +453,8 @@ ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: bl __gttf2 -; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w8, w19, wzr, le ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill @@ -569,8 +569,8 @@ ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptoui.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x @@ -583,10 +583,10 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: mov w5, v0.s[1] ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: mov w5, v0.s[1] ; CHECK-NEXT: mov w6, v0.s[2] ; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: fmov w4, s0 @@ -711,8 +711,8 @@ ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzu x8, s0 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzu x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: fcvtzu x9, s1 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f) ret <2 x i64> %x @@ -739,13 +739,13 @@ ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: mov x21, #68719476735 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: csel x19, x21, x9, gt ; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti @@ -760,8 +760,8 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x1, x21, x9, gt -; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #64 @@ -790,9 +790,9 @@ ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 @@ -837,8 +837,8 @@ define <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -849,8 +849,8 @@ define <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -861,8 +861,8 @@ define <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #31, msl #8 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #31, msl #8 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -883,8 +883,8 @@ define <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #7, msl #16 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #7, msl #16 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %x = call <4 x i19> @llvm.fptoui.sat.v4f32.v4i19(<4 x float> %f) @@ -929,15 +929,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov s3, v0.s[1] -; CHECK-NEXT: fcvtzu x9, s0 +; CHECK-NEXT: fcvtzu x8, s0 ; CHECK-NEXT: mov s2, v1.s[1] -; CHECK-NEXT: fcvtzu x8, s1 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzu x9, s3 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fcvtzu x8, s2 -; CHECK-NEXT: mov v0.d[1], x9 -; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: fcvtzu x9, s1 +; CHECK-NEXT: fcvtzu x11, s3 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fcvtzu x10, s2 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 ; CHECK-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f) ret <4 x i64> %x @@ -970,8 +970,8 @@ ; CHECK-NEXT: mov w8, #1904214015 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x25, #68719476735 ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: mov x25, #68719476735 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt @@ -1015,10 +1015,10 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x1, x25, x9, gt -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ldp x30, x25, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x30, x25, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 @@ -1098,10 +1098,10 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csinv x1, x9, xzr, le -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -1223,8 +1223,8 @@ ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f) @@ -1278,13 +1278,13 @@ ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: mov x8, #5057542381537067007 ; CHECK-NEXT: fcmp d8, #0.0 +; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: mov x21, #68719476735 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: csel x19, x21, x9, gt ; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunsdfti @@ -1299,8 +1299,8 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x1, x21, x9, gt -; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #64 @@ -1328,9 +1328,9 @@ ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: mov x8, #5183643171103440895 ; CHECK-NEXT: fcmp d8, #0.0 +; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp d8, d9 @@ -1524,37 +1524,37 @@ ; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h1, v0.h[2] -; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: fcvt s3, h0 -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: mov h3, v0.h[3] +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzu x8, s3 -; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzu x8, s0 +; CHECK-CVT-NEXT: fcvtzu x10, s2 ; CHECK-CVT-NEXT: fcvtzu x9, s1 +; CHECK-CVT-NEXT: fcvtzu x11, s3 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzu x8, s2 -; CHECK-CVT-NEXT: fmov d1, x9 -; CHECK-CVT-NEXT: fcvtzu x9, s3 -; CHECK-CVT-NEXT: mov v0.d[1], x8 -; CHECK-CVT-NEXT: mov v1.d[1], x9 +; CHECK-CVT-NEXT: fmov d1, x10 +; CHECK-CVT-NEXT: mov v0.d[1], x9 +; CHECK-CVT-NEXT: mov v1.d[1], x11 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h1, v0.h[2] -; CHECK-FP16-NEXT: mov h2, v0.h[1] +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: mov h3, v0.h[3] ; CHECK-FP16-NEXT: fcvtzu x8, h0 +; CHECK-FP16-NEXT: fcvtzu x10, h2 ; CHECK-FP16-NEXT: fcvtzu x9, h1 +; CHECK-FP16-NEXT: fcvtzu x11, h3 ; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzu x8, h2 -; CHECK-FP16-NEXT: fmov d1, x9 -; CHECK-FP16-NEXT: fcvtzu x9, h3 -; CHECK-FP16-NEXT: mov v0.d[1], x8 -; CHECK-FP16-NEXT: mov v1.d[1], x9 +; CHECK-FP16-NEXT: fmov d1, x10 +; CHECK-FP16-NEXT: mov v0.d[1], x9 +; CHECK-FP16-NEXT: mov v1.d[1], x11 ; CHECK-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -1589,15 +1589,15 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: mov x25, #68719476735 ; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x10, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x19, x25, x9, gt -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csel x19, x25, x10, gt +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -1634,13 +1634,13 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x1, x25, x9, gt -; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %x = call <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half> %f) @@ -1675,14 +1675,14 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2139095039 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x10, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x19, x9, xzr, le -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csinv x19, x10, xzr, le +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -1719,11 +1719,11 @@ ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csinv x1, x9, xzr, le -; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 @@ -1753,50 +1753,50 @@ ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: fcvtzu w8, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w11, s3 -; CHECK-CVT-NEXT: fcvtzu w12, s4 -; CHECK-CVT-NEXT: fcvtzu w13, s5 +; CHECK-CVT-NEXT: fcvtzu w12, s1 ; CHECK-CVT-NEXT: cmp w8, #1 ; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo ; CHECK-CVT-NEXT: cmp w9, #1 ; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo ; CHECK-CVT-NEXT: cmp w11, #1 +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s2 ; CHECK-CVT-NEXT: csinc w11, w11, wzr, lo ; CHECK-CVT-NEXT: cmp w12, #1 ; CHECK-CVT-NEXT: csinc w12, w12, wzr, lo -; CHECK-CVT-NEXT: cmp w13, #1 -; CHECK-CVT-NEXT: csinc w13, w13, wzr, lo -; CHECK-CVT-NEXT: cmp w10, #1 -; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvtzu w9, s1 -; CHECK-CVT-NEXT: fmov s3, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w8 +; CHECK-CVT-NEXT: mov s2, v0.s[2] ; CHECK-CVT-NEXT: cmp w9, #1 -; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo +; CHECK-CVT-NEXT: cmp w10, #1 +; CHECK-CVT-NEXT: mov v1.s[1], w8 +; CHECK-CVT-NEXT: csinc w8, w10, wzr, lo +; CHECK-CVT-NEXT: fmov s3, w8 +; CHECK-CVT-NEXT: fcvtzu w8, s2 +; CHECK-CVT-NEXT: mov v3.s[1], w9 ; CHECK-CVT-NEXT: fcvtzu w9, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w13 -; CHECK-CVT-NEXT: mov v2.s[2], w11 +; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo +; CHECK-CVT-NEXT: mov v1.s[2], w11 ; CHECK-CVT-NEXT: cmp w9, #1 ; CHECK-CVT-NEXT: mov v3.s[2], w8 ; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo -; CHECK-CVT-NEXT: mov v2.s[3], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w12 ; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v1.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.8h, #1 ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: movi v1.8h, #1 ; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret @@ -1812,43 +1812,43 @@ ; CHECK-CVT-NEXT: mov w8, #255 ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: fcvtzu w13, s1 ; CHECK-CVT-NEXT: cmp w9, #255 ; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w10, #255 ; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w12, #255 +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s2 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, #255 ; CHECK-CVT-NEXT: csel w13, w13, w8, lo -; CHECK-CVT-NEXT: cmp w14, #255 -; CHECK-CVT-NEXT: csel w14, w14, w8, lo -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s3, w11 -; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: mov s2, v0.s[2] ; CHECK-CVT-NEXT: cmp w10, #255 -; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w11, #255 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: csel w9, w11, w8, lo +; CHECK-CVT-NEXT: fmov s3, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov v3.s[1], w10 ; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: mov v1.s[2], w12 ; CHECK-CVT-NEXT: cmp w10, #255 -; CHECK-CVT-NEXT: csel w8, w10, w8, lo ; CHECK-CVT-NEXT: mov v3.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo +; CHECK-CVT-NEXT: mov v1.s[3], w13 ; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v1.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -1869,43 +1869,43 @@ ; CHECK-CVT-NEXT: mov w8, #8191 ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: fcvtzu w13, s1 ; CHECK-CVT-NEXT: cmp w9, w8 ; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w10, w8 ; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s2 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 ; CHECK-CVT-NEXT: csel w13, w13, w8, lo -; CHECK-CVT-NEXT: cmp w14, w8 -; CHECK-CVT-NEXT: csel w14, w14, w8, lo -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s3, w11 -; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: mov s2, v0.s[2] ; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: csel w9, w11, w8, lo +; CHECK-CVT-NEXT: fmov s3, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov v3.s[1], w10 ; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: mov v1.s[2], w12 ; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w8, w10, w8, lo ; CHECK-CVT-NEXT: mov v3.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo +; CHECK-CVT-NEXT: mov v1.s[3], w13 ; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13: @@ -1926,43 +1926,43 @@ ; CHECK-CVT-NEXT: mov w8, #65535 ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: fcvtzu w13, s1 ; CHECK-CVT-NEXT: cmp w9, w8 ; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w10, w8 ; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s2 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 ; CHECK-CVT-NEXT: csel w13, w13, w8, lo -; CHECK-CVT-NEXT: cmp w14, w8 -; CHECK-CVT-NEXT: csel w14, w14, w8, lo -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s3, w11 -; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: mov s2, v0.s[2] ; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: csel w9, w11, w8, lo +; CHECK-CVT-NEXT: fmov s3, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov v3.s[1], w10 ; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: mov v1.s[2], w12 ; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w8, w10, w8, lo ; CHECK-CVT-NEXT: mov v3.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo +; CHECK-CVT-NEXT: mov v1.s[3], w13 ; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16: @@ -1976,21 +1976,21 @@ define <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i19: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v2.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-NEXT: movi v1.4s, #7, msl #16 -; CHECK-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: movi v2.4s, #7, msl #16 +; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov w1, v2.s[1] -; CHECK-NEXT: mov w2, v2.s[2] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w3, v2.s[3] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: mov w7, v0.s[3] -; CHECK-NEXT: fmov w4, s0 -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: mov w6, v1.s[2] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w7, v1.s[3] +; CHECK-NEXT: mov w3, v0.s[3] +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x @@ -2096,63 +2096,63 @@ ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-CVT-NEXT: mov h2, v0.h[1] +; CHECK-CVT-NEXT: fcvt s3, h0 ; CHECK-CVT-NEXT: mov h4, v0.h[2] -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: fcvt s2, h1 -; CHECK-CVT-NEXT: mov h3, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: fcvt s5, h1 +; CHECK-CVT-NEXT: mov h7, v1.h[2] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: mov h6, v1.h[1] ; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvtzu x8, s3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h0 ; CHECK-CVT-NEXT: fcvtzu x9, s5 -; CHECK-CVT-NEXT: fcvtzu x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h3 -; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s5, h7 +; CHECK-CVT-NEXT: fcvtzu x10, s2 +; CHECK-CVT-NEXT: fcvt s2, h6 ; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fcvtzu x8, s3 +; CHECK-CVT-NEXT: fcvtzu x11, s5 +; CHECK-CVT-NEXT: mov v0.d[1], x10 ; CHECK-CVT-NEXT: fcvtzu x10, s2 -; CHECK-CVT-NEXT: fmov d2, x8 -; CHECK-CVT-NEXT: fcvtzu x8, s4 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: fmov d0, x9 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: fcvtzu x10, s5 +; CHECK-CVT-NEXT: fmov d2, x9 +; CHECK-CVT-NEXT: fcvtzu x9, s4 ; CHECK-CVT-NEXT: fmov d1, x8 -; CHECK-CVT-NEXT: fcvtzu x9, s3 -; CHECK-CVT-NEXT: fcvtzu x8, s4 -; CHECK-CVT-NEXT: fmov d3, x10 -; CHECK-CVT-NEXT: fcvtzu x10, s6 -; CHECK-CVT-NEXT: mov v0.d[1], x9 -; CHECK-CVT-NEXT: mov v1.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: fcvtzu x8, s6 +; CHECK-CVT-NEXT: fmov d3, x11 +; CHECK-CVT-NEXT: mov v2.d[1], x10 +; CHECK-CVT-NEXT: mov v1.d[1], x9 +; CHECK-CVT-NEXT: mov v3.d[1], x8 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-FP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzu x8, h0 ; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: mov h4, v0.h[3] +; CHECK-FP16-NEXT: mov h5, v2.h[2] +; CHECK-FP16-NEXT: fmov d0, x8 ; CHECK-FP16-NEXT: fcvtzu x8, h1 -; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: mov h6, v1.h[3] -; CHECK-FP16-NEXT: fcvtzu x10, h2 -; CHECK-FP16-NEXT: fmov d2, x8 +; CHECK-FP16-NEXT: mov h1, v2.h[1] +; CHECK-FP16-NEXT: mov h6, v2.h[3] +; CHECK-FP16-NEXT: fcvtzu x9, h2 +; CHECK-FP16-NEXT: mov v0.d[1], x8 ; CHECK-FP16-NEXT: fcvtzu x8, h3 -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x9 -; CHECK-FP16-NEXT: mov v2.d[1], x10 -; CHECK-FP16-NEXT: fcvtzu x10, h4 +; CHECK-FP16-NEXT: fcvtzu x11, h5 +; CHECK-FP16-NEXT: fcvtzu x10, h1 +; CHECK-FP16-NEXT: fmov d2, x9 +; CHECK-FP16-NEXT: fcvtzu x9, h4 ; CHECK-FP16-NEXT: fmov d1, x8 -; CHECK-FP16-NEXT: fcvtzu x9, h3 -; CHECK-FP16-NEXT: fcvtzu x8, h5 -; CHECK-FP16-NEXT: fmov d3, x10 -; CHECK-FP16-NEXT: fcvtzu x10, h6 -; CHECK-FP16-NEXT: mov v0.d[1], x9 -; CHECK-FP16-NEXT: mov v1.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x10 +; CHECK-FP16-NEXT: fcvtzu x8, h6 +; CHECK-FP16-NEXT: fmov d3, x11 +; CHECK-FP16-NEXT: mov v2.d[1], x10 +; CHECK-FP16-NEXT: mov v1.d[1], x9 +; CHECK-FP16-NEXT: mov v3.d[1], x8 ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2195,17 +2195,17 @@ ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x21, #68719476735 -; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: mov x24, #68719476735 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x10, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x20, x21, x8, gt +; CHECK-NEXT: csinv x8, x10, xzr, le +; CHECK-NEXT: csel x20, x24, x9, gt +; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: str x9, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -2214,7 +2214,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x23, x21, x8, gt +; CHECK-NEXT: csel x22, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti @@ -2226,7 +2226,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x24, x21, x9, gt +; CHECK-NEXT: csel x23, x24, x9, gt ; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti @@ -2238,7 +2238,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x26, x21, x9, gt +; CHECK-NEXT: csel x26, x24, x9, gt ; CHECK-NEXT: str x8, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti @@ -2249,8 +2249,9 @@ ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x29, x9, xzr, le -; CHECK-NEXT: csel x28, x21, x8, gt +; CHECK-NEXT: csinv x9, x9, xzr, le +; CHECK-NEXT: str x9, [sp] // 8-byte Folded Spill +; CHECK-NEXT: csel x29, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 @@ -2259,8 +2260,8 @@ ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x27, x9, xzr, le -; CHECK-NEXT: csel x22, x21, x8, gt +; CHECK-NEXT: csinv x25, x9, xzr, le +; CHECK-NEXT: csel x27, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -2270,58 +2271,56 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x25, x21, x9, gt -; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill +; CHECK-NEXT: csel x28, x24, x9, gt +; CHECK-NEXT: csinv x21, x8, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: fmov d0, x27 -; CHECK-NEXT: fmov d1, x29 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: lsr x10, x22, #28 -; CHECK-NEXT: stur x11, [x19, #75] -; CHECK-NEXT: lsr x11, x28, #28 -; CHECK-NEXT: mov v0.d[1], x22 -; CHECK-NEXT: ldr x12, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v1.d[1], x28 +; CHECK-NEXT: fmov d0, x25 +; CHECK-NEXT: lsr x10, x27, #28 +; CHECK-NEXT: lsr x11, x29, #28 +; CHECK-NEXT: ldr d1, [sp] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: stur x12, [x19, #50] -; CHECK-NEXT: fmov x12, d0 -; CHECK-NEXT: fmov x13, d1 -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: ldp d0, d1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, x21, x9, gt ; CHECK-NEXT: strb w10, [x19, #49] -; CHECK-NEXT: extr x10, x22, x12, #28 -; CHECK-NEXT: bfi x9, x12, #36, #28 -; CHECK-NEXT: stur x8, [x19, #25] -; CHECK-NEXT: extr x8, x28, x13, #28 -; CHECK-NEXT: mov v0.d[1], x23 ; CHECK-NEXT: strb w11, [x19, #24] -; CHECK-NEXT: mov v1.d[1], x20 +; CHECK-NEXT: mov v0.d[1], x27 +; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: stur x8, [x19, #25] +; CHECK-NEXT: csel x8, x24, x9, gt +; CHECK-NEXT: ldr x9, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: mov v1.d[1], x29 +; CHECK-NEXT: stur x9, [x19, #75] +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: bfi x8, x9, #36, #28 +; CHECK-NEXT: stur x10, [x19, #50] +; CHECK-NEXT: extr x10, x27, x9, #28 +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: stur x8, [x19, #33] ; CHECK-NEXT: stur x10, [x19, #41] -; CHECK-NEXT: stur x9, [x19, #33] -; CHECK-NEXT: bfi x25, x13, #36, #28 -; CHECK-NEXT: str x8, [x19, #16] -; CHECK-NEXT: lsr x9, x23, #28 +; CHECK-NEXT: lsr x8, x22, #28 +; CHECK-NEXT: extr x10, x29, x9, #28 +; CHECK-NEXT: bfi x28, x9, #36, #28 +; CHECK-NEXT: lsr x9, x20, #28 +; CHECK-NEXT: str x10, [x19, #16] +; CHECK-NEXT: ldp d0, d1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: strb w8, [x19, #99] +; CHECK-NEXT: strb w9, [x19, #74] +; CHECK-NEXT: stp x21, x28, [x19] +; CHECK-NEXT: mov v0.d[1], x22 +; CHECK-NEXT: mov v1.d[1], x20 ; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: ldr x12, [sp] // 8-byte Folded Reload -; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: lsr x10, x20, #28 -; CHECK-NEXT: strb w9, [x19, #99] -; CHECK-NEXT: stp x12, x25, [x19] -; CHECK-NEXT: extr x12, x23, x8, #28 +; CHECK-NEXT: extr x9, x22, x8, #28 ; CHECK-NEXT: bfi x26, x8, #36, #28 -; CHECK-NEXT: extr x8, x20, x11, #28 -; CHECK-NEXT: bfi x24, x11, #36, #28 -; CHECK-NEXT: strb w10, [x19, #74] -; CHECK-NEXT: stur x12, [x19, #91] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: stur x9, [x19, #91] ; CHECK-NEXT: stur x26, [x19, #83] -; CHECK-NEXT: stur x8, [x19, #66] -; CHECK-NEXT: stur x24, [x19, #58] +; CHECK-NEXT: extr x9, x20, x8, #28 +; CHECK-NEXT: bfi x23, x8, #36, #28 +; CHECK-NEXT: stur x9, [x19, #66] +; CHECK-NEXT: stur x23, [x19, #58] ; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload @@ -2371,16 +2370,16 @@ ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2139095039 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x10, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: csinv x11, x10, xzr, le +; CHECK-NEXT: csinv x8, x9, xzr, le +; CHECK-NEXT: stp x8, x11, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: stp x8, x9, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -2391,8 +2390,8 @@ ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: stp x8, x9, [sp] // 16-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -2514,9 +2513,9 @@ define <16 x i8> @test_unsigned_v16f32_v16i8(<16 x float> %f) { ; CHECK-LABEL: test_unsigned_v16f32_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v4.2d, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v3.4s, v3.4s ; CHECK-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-NEXT: movi v4.2d, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: umin v3.4s, v3.4s, v4.4s @@ -2548,9 +2547,9 @@ define <16 x i16> @test_unsigned_v16f32_v16i16(<16 x float> %f) { ; CHECK-LABEL: test_unsigned_v16f32_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v4.2d, #0x00ffff0000ffff ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v4.2d, #0x00ffff0000ffff ; CHECK-NEXT: fcvtzu v3.4s, v3.4s ; CHECK-NEXT: fcvtzu v2.4s, v2.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s @@ -2571,84 +2570,84 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h ; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl2 v5.4s, v0.8h -; CHECK-CVT-NEXT: mov w8, #255 -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w11, #255 ; CHECK-CVT-NEXT: mov s3, v2.s[1] ; CHECK-CVT-NEXT: mov s4, v2.s[2] -; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w8, s2 ; CHECK-CVT-NEXT: mov s2, v2.s[3] ; CHECK-CVT-NEXT: fcvtzu w12, s1 -; CHECK-CVT-NEXT: fcvtzu w16, s5 -; CHECK-CVT-NEXT: fcvtzu w2, s0 -; CHECK-CVT-NEXT: fcvtzu w10, s3 +; CHECK-CVT-NEXT: fcvtzu w9, s3 ; CHECK-CVT-NEXT: mov s3, v1.s[1] -; CHECK-CVT-NEXT: fcvtzu w11, s4 -; CHECK-CVT-NEXT: mov s4, v1.s[2] -; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w10, s4 +; CHECK-CVT-NEXT: fcvtl2 v4.4s, v0.8h ; CHECK-CVT-NEXT: fcvtzu w13, s2 -; CHECK-CVT-NEXT: cmp w10, #255 -; CHECK-CVT-NEXT: mov s2, v5.s[1] -; CHECK-CVT-NEXT: fcvtzu w14, s3 -; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: cmp w9, #255 -; CHECK-CVT-NEXT: fcvtzu w15, s4 -; CHECK-CVT-NEXT: csel w9, w9, w8, lo -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w11, w11, w8, lo +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: csel w9, w9, w11, lo +; CHECK-CVT-NEXT: cmp w8, #255 +; CHECK-CVT-NEXT: csel w8, w8, w11, lo +; CHECK-CVT-NEXT: fcvtzu w14, s3 +; CHECK-CVT-NEXT: mov s3, v4.s[1] +; CHECK-CVT-NEXT: fcvtzu w16, s2 +; CHECK-CVT-NEXT: fmov s2, w8 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: csel w10, w10, w11, lo ; CHECK-CVT-NEXT: cmp w13, #255 -; CHECK-CVT-NEXT: mov s3, v5.s[2] -; CHECK-CVT-NEXT: fcvtzu w17, s1 -; CHECK-CVT-NEXT: csel w13, w13, w8, lo +; CHECK-CVT-NEXT: fcvtzu w8, s1 +; CHECK-CVT-NEXT: fcvtzu w15, s4 +; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: csel w9, w13, w11, lo ; CHECK-CVT-NEXT: cmp w14, #255 -; CHECK-CVT-NEXT: mov s4, v5.s[3] -; CHECK-CVT-NEXT: fcvtzu w18, s2 -; CHECK-CVT-NEXT: csel w14, w14, w8, lo +; CHECK-CVT-NEXT: mov s1, v4.s[2] +; CHECK-CVT-NEXT: csel w13, w14, w11, lo ; CHECK-CVT-NEXT: cmp w12, #255 -; CHECK-CVT-NEXT: mov s1, v0.s[1] -; CHECK-CVT-NEXT: csel w12, w12, w8, lo -; CHECK-CVT-NEXT: cmp w15, #255 -; CHECK-CVT-NEXT: fcvtzu w0, s3 -; CHECK-CVT-NEXT: csel w15, w15, w8, lo -; CHECK-CVT-NEXT: cmp w17, #255 -; CHECK-CVT-NEXT: csel w17, w17, w8, lo -; CHECK-CVT-NEXT: cmp w18, #255 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: csel w9, w18, w8, lo -; CHECK-CVT-NEXT: fcvtzu w18, s4 +; CHECK-CVT-NEXT: fcvtzu w14, s3 +; CHECK-CVT-NEXT: csel w12, w12, w11, lo ; CHECK-CVT-NEXT: cmp w16, #255 -; CHECK-CVT-NEXT: fcvtzu w1, s1 -; CHECK-CVT-NEXT: csel w16, w16, w8, lo -; CHECK-CVT-NEXT: cmp w0, #255 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: csel w0, w0, w8, lo -; CHECK-CVT-NEXT: cmp w18, #255 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w18, w8, lo -; CHECK-CVT-NEXT: cmp w1, #255 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: csel w16, w16, w11, lo +; CHECK-CVT-NEXT: cmp w8, #255 ; CHECK-CVT-NEXT: fmov s3, w12 -; CHECK-CVT-NEXT: csel w18, w1, w8, lo -; CHECK-CVT-NEXT: cmp w2, #255 -; CHECK-CVT-NEXT: csel w1, w2, w8, lo -; CHECK-CVT-NEXT: fmov s4, w16 -; CHECK-CVT-NEXT: mov v2.s[2], w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 +; CHECK-CVT-NEXT: mov s4, v4.s[3] +; CHECK-CVT-NEXT: csel w8, w8, w11, lo +; CHECK-CVT-NEXT: cmp w14, #255 +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: csel w10, w14, w11, lo +; CHECK-CVT-NEXT: cmp w15, #255 +; CHECK-CVT-NEXT: fcvtzu w12, s1 +; CHECK-CVT-NEXT: mov s1, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w14, s4 +; CHECK-CVT-NEXT: mov v3.s[1], w13 +; CHECK-CVT-NEXT: csel w13, w15, w11, lo +; CHECK-CVT-NEXT: fmov s4, w13 +; CHECK-CVT-NEXT: cmp w12, #255 +; CHECK-CVT-NEXT: fcvtzu w13, s0 +; CHECK-CVT-NEXT: csel w12, w12, w11, lo +; CHECK-CVT-NEXT: cmp w14, #255 +; CHECK-CVT-NEXT: mov v4.s[1], w10 +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csel w14, w14, w11, lo ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fmov s1, w1 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: mov v4.s[1], w9 -; CHECK-CVT-NEXT: csel w9, w11, w8, lo -; CHECK-CVT-NEXT: mov v1.s[1], w18 -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v3.s[2], w15 -; CHECK-CVT-NEXT: mov v4.s[2], w0 -; CHECK-CVT-NEXT: mov v1.s[2], w9 -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w10 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: csel w10, w10, w11, lo +; CHECK-CVT-NEXT: cmp w13, #255 +; CHECK-CVT-NEXT: csel w13, w13, w11, lo +; CHECK-CVT-NEXT: fcvtzu w15, s1 +; CHECK-CVT-NEXT: fmov s1, w13 +; CHECK-CVT-NEXT: mov v3.s[2], w16 +; CHECK-CVT-NEXT: cmp w15, #255 +; CHECK-CVT-NEXT: mov v1.s[1], w10 +; CHECK-CVT-NEXT: csel w10, w15, w11, lo +; CHECK-CVT-NEXT: mov v4.s[2], w12 +; CHECK-CVT-NEXT: fcvtzu w12, s0 +; CHECK-CVT-NEXT: mov v3.s[3], w8 +; CHECK-CVT-NEXT: mov v1.s[2], w10 +; CHECK-CVT-NEXT: cmp w12, #255 +; CHECK-CVT-NEXT: csel w8, w12, w11, lo +; CHECK-CVT-NEXT: mov v2.s[3], w9 +; CHECK-CVT-NEXT: mov v4.s[3], w14 ; CHECK-CVT-NEXT: mov v1.s[3], w8 ; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h ; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v4.8h @@ -2673,86 +2672,86 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v5.4s, v1.8h -; CHECK-CVT-NEXT: mov w8, #65535 -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-NEXT: mov w11, #65535 ; CHECK-CVT-NEXT: mov s3, v2.s[1] ; CHECK-CVT-NEXT: mov s4, v2.s[2] -; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w8, s2 ; CHECK-CVT-NEXT: mov s2, v2.s[3] ; CHECK-CVT-NEXT: fcvtzu w12, s0 -; CHECK-CVT-NEXT: fcvtzu w16, s5 -; CHECK-CVT-NEXT: fcvtzu w2, s1 -; CHECK-CVT-NEXT: fcvtzu w10, s3 +; CHECK-CVT-NEXT: fcvtzu w9, s3 ; CHECK-CVT-NEXT: mov s3, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w11, s4 -; CHECK-CVT-NEXT: mov s4, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fcvtzu w10, s4 +; CHECK-CVT-NEXT: fcvtl2 v4.4s, v1.8h ; CHECK-CVT-NEXT: fcvtzu w13, s2 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: mov s2, v5.s[1] +; CHECK-CVT-NEXT: mov s2, v0.s[2] +; CHECK-CVT-NEXT: cmp w9, w11 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w9, w9, w11, lo +; CHECK-CVT-NEXT: cmp w8, w11 +; CHECK-CVT-NEXT: csel w8, w8, w11, lo ; CHECK-CVT-NEXT: fcvtzu w14, s3 -; CHECK-CVT-NEXT: csel w10, w10, w8, lo -; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: mov s3, v4.s[1] +; CHECK-CVT-NEXT: fcvtzu w16, s2 +; CHECK-CVT-NEXT: fmov s2, w8 +; CHECK-CVT-NEXT: cmp w10, w11 +; CHECK-CVT-NEXT: csel w10, w10, w11, lo +; CHECK-CVT-NEXT: cmp w13, w11 +; CHECK-CVT-NEXT: fcvtzu w8, s0 ; CHECK-CVT-NEXT: fcvtzu w15, s4 -; CHECK-CVT-NEXT: csel w9, w9, w8, lo -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: cmp w13, w8 -; CHECK-CVT-NEXT: mov s3, v5.s[2] -; CHECK-CVT-NEXT: fcvtzu w17, s0 -; CHECK-CVT-NEXT: csel w13, w13, w8, lo -; CHECK-CVT-NEXT: cmp w14, w8 -; CHECK-CVT-NEXT: mov s4, v5.s[3] -; CHECK-CVT-NEXT: fcvtzu w18, s2 -; CHECK-CVT-NEXT: csel w14, w14, w8, lo -; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: csel w9, w13, w11, lo +; CHECK-CVT-NEXT: cmp w14, w11 +; CHECK-CVT-NEXT: mov s0, v4.s[2] +; CHECK-CVT-NEXT: csel w13, w14, w11, lo +; CHECK-CVT-NEXT: cmp w12, w11 +; CHECK-CVT-NEXT: fcvtzu w14, s3 +; CHECK-CVT-NEXT: csel w12, w12, w11, lo +; CHECK-CVT-NEXT: cmp w16, w11 +; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-NEXT: csel w16, w16, w11, lo +; CHECK-CVT-NEXT: cmp w8, w11 +; CHECK-CVT-NEXT: fmov s3, w12 +; CHECK-CVT-NEXT: mov s4, v4.s[3] +; CHECK-CVT-NEXT: csel w8, w8, w11, lo +; CHECK-CVT-NEXT: cmp w14, w11 +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: csel w10, w14, w11, lo +; CHECK-CVT-NEXT: cmp w15, w11 +; CHECK-CVT-NEXT: fcvtzu w12, s0 ; CHECK-CVT-NEXT: mov s0, v1.s[1] -; CHECK-CVT-NEXT: csel w12, w12, w8, lo -; CHECK-CVT-NEXT: cmp w15, w8 -; CHECK-CVT-NEXT: fcvtzu w0, s3 -; CHECK-CVT-NEXT: csel w15, w15, w8, lo -; CHECK-CVT-NEXT: cmp w17, w8 -; CHECK-CVT-NEXT: csel w17, w17, w8, lo -; CHECK-CVT-NEXT: cmp w18, w8 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: csel w9, w18, w8, lo -; CHECK-CVT-NEXT: fcvtzu w18, s4 -; CHECK-CVT-NEXT: cmp w16, w8 -; CHECK-CVT-NEXT: fcvtzu w1, s0 -; CHECK-CVT-NEXT: csel w16, w16, w8, lo -; CHECK-CVT-NEXT: cmp w0, w8 +; CHECK-CVT-NEXT: fcvtzu w14, s4 +; CHECK-CVT-NEXT: mov v3.s[1], w13 +; CHECK-CVT-NEXT: csel w13, w15, w11, lo +; CHECK-CVT-NEXT: fmov s4, w13 +; CHECK-CVT-NEXT: cmp w12, w11 +; CHECK-CVT-NEXT: fcvtzu w13, s1 +; CHECK-CVT-NEXT: csel w12, w12, w11, lo +; CHECK-CVT-NEXT: cmp w14, w11 +; CHECK-CVT-NEXT: mov v4.s[1], w10 +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: csel w14, w14, w11, lo ; CHECK-CVT-NEXT: mov s0, v1.s[2] -; CHECK-CVT-NEXT: csel w0, w0, w8, lo -; CHECK-CVT-NEXT: cmp w18, w8 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w18, w8, lo -; CHECK-CVT-NEXT: cmp w1, w8 -; CHECK-CVT-NEXT: fmov s3, w12 -; CHECK-CVT-NEXT: csel w18, w1, w8, lo -; CHECK-CVT-NEXT: cmp w2, w8 -; CHECK-CVT-NEXT: csel w1, w2, w8, lo -; CHECK-CVT-NEXT: fmov s4, w16 -; CHECK-CVT-NEXT: mov v2.s[2], w11 -; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: mov v3.s[2], w16 +; CHECK-CVT-NEXT: cmp w10, w11 +; CHECK-CVT-NEXT: csel w10, w10, w11, lo +; CHECK-CVT-NEXT: cmp w13, w11 +; CHECK-CVT-NEXT: csel w13, w13, w11, lo +; CHECK-CVT-NEXT: fcvtzu w15, s0 +; CHECK-CVT-NEXT: fmov s5, w13 ; CHECK-CVT-NEXT: mov s0, v1.s[3] -; CHECK-CVT-NEXT: fmov s5, w1 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: mov v4.s[1], w9 -; CHECK-CVT-NEXT: csel w9, w11, w8, lo -; CHECK-CVT-NEXT: mov v5.s[1], w18 -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v3.s[2], w15 -; CHECK-CVT-NEXT: mov v4.s[2], w0 -; CHECK-CVT-NEXT: mov v5.s[2], w9 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w10 -; CHECK-CVT-NEXT: mov v5.s[3], w8 +; CHECK-CVT-NEXT: mov v4.s[2], w12 +; CHECK-CVT-NEXT: cmp w15, w11 +; CHECK-CVT-NEXT: mov v5.s[1], w10 +; CHECK-CVT-NEXT: csel w10, w15, w11, lo +; CHECK-CVT-NEXT: fcvtzu w12, s0 +; CHECK-CVT-NEXT: mov v3.s[3], w8 +; CHECK-CVT-NEXT: mov v2.s[3], w9 +; CHECK-CVT-NEXT: mov v5.s[2], w10 +; CHECK-CVT-NEXT: cmp w12, w11 +; CHECK-CVT-NEXT: csel w8, w12, w11, lo +; CHECK-CVT-NEXT: mov v4.s[3], w14 ; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: mov v5.s[3], w8 ; CHECK-CVT-NEXT: uzp1 v1.8h, v5.8h, v4.8h ; CHECK-CVT-NEXT: ret ; @@ -2769,45 +2768,45 @@ ; CHECK-LABEL: test_unsigned_v8f64_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d4, v3.d[1] +; CHECK-NEXT: mov d5, v2.d[1] ; CHECK-NEXT: fcvtzu w10, d3 -; CHECK-NEXT: mov d3, v2.d[1] -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: fcvtzu w12, d2 +; CHECK-NEXT: mov w9, #255 +; CHECK-NEXT: fcvtzu w11, d2 +; CHECK-NEXT: mov d2, v1.d[1] ; CHECK-NEXT: fcvtzu w13, d1 -; CHECK-NEXT: fcvtzu w9, d4 -; CHECK-NEXT: mov d4, v1.d[1] -; CHECK-NEXT: fcvtzu w11, d3 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: fcvtzu w8, d4 +; CHECK-NEXT: fcvtzu w12, d5 +; CHECK-NEXT: cmp w8, #255 +; CHECK-NEXT: csel w8, w8, w9, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: csel w10, w10, w9, lo ; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: fmov s19, w10 -; CHECK-NEXT: fcvtzu w10, d4 +; CHECK-NEXT: fmov s6, w10 +; CHECK-NEXT: fcvtzu w10, d2 +; CHECK-NEXT: csel w12, w12, w9, lo +; CHECK-NEXT: cmp w11, #255 +; CHECK-NEXT: mov v6.s[1], w8 +; CHECK-NEXT: csel w8, w11, w9, lo +; CHECK-NEXT: fmov s5, w8 ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v19.s[1], w9 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: fmov s18, w12 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: csel w12, w13, w8, lo -; CHECK-NEXT: fcvtzu w13, d0 -; CHECK-NEXT: mov v18.s[1], w11 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: fmov s17, w12 -; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: csel w8, w10, w9, lo +; CHECK-NEXT: fcvtzu w10, d1 ; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v17.s[1], w10 -; CHECK-NEXT: fmov s16, w8 +; CHECK-NEXT: fcvtzu w11, d0 +; CHECK-NEXT: mov v5.s[1], w12 +; CHECK-NEXT: csel w12, w13, w9, lo +; CHECK-NEXT: fmov s4, w12 +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: csel w10, w10, w9, lo +; CHECK-NEXT: cmp w11, #255 +; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: csel w8, w11, w9, lo +; CHECK-NEXT: fmov s3, w8 ; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: mov v16.s[1], w9 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: tbl v0.8b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.8b +; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: tbl v0.8b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.8b ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptoui.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -2820,8 +2819,7 @@ ; CHECK-NEXT: fcvtzu w10, d0 ; CHECK-NEXT: mov d0, v1.d[1] ; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: fcvtzu w12, d1 -; CHECK-NEXT: mov d1, v2.d[1] +; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w9, d16 ; CHECK-NEXT: fcvtzu w11, d0 ; CHECK-NEXT: cmp w9, #255 @@ -2831,94 +2829,95 @@ ; CHECK-NEXT: cmp w11, #255 ; CHECK-NEXT: fmov s0, w10 ; CHECK-NEXT: csel w10, w11, w8, lo -; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: csel w11, w12, w8, lo ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d2 +; CHECK-NEXT: mov d1, v2.d[1] ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: mov w12, v0.s[1] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: mov d2, v4.d[1] -; CHECK-NEXT: mov v0.b[1], w12 -; CHECK-NEXT: fmov w13, s1 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: mov w11, v0.s[1] +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d1, v3.d[1] +; CHECK-NEXT: mov v2.s[1], w10 +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: mov v0.b[1], w11 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w12, #255 +; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: fmov w11, s2 +; CHECK-NEXT: mov w10, v2.s[1] +; CHECK-NEXT: fmov s2, w12 +; CHECK-NEXT: fcvtzu w12, d1 +; CHECK-NEXT: mov d1, v4.d[1] +; CHECK-NEXT: mov v0.b[2], w11 ; CHECK-NEXT: fcvtzu w11, d3 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v0.b[2], w13 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w10, w8, lo +; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: cmp w12, #255 +; CHECK-NEXT: csel w9, w12, w8, lo ; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: fcvtzu w10, d2 +; CHECK-NEXT: mov v0.b[3], w10 ; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: mov d2, v5.d[1] -; CHECK-NEXT: mov v0.b[3], w12 -; CHECK-NEXT: fmov w12, s1 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w13, v1.s[1] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d4 +; CHECK-NEXT: fmov w12, s2 +; CHECK-NEXT: mov w10, v2.s[1] +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fcvtzu w11, d1 +; CHECK-NEXT: mov d1, v5.d[1] ; CHECK-NEXT: mov v0.b[4], w12 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w10, w8, lo +; CHECK-NEXT: fcvtzu w12, d4 +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w10, w11, w8, lo -; CHECK-NEXT: mov v0.b[5], w13 -; CHECK-NEXT: fcvtzu w13, d2 -; CHECK-NEXT: fmov w11, s1 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: fcvtzu w10, d5 -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: mov v0.b[6], w11 -; CHECK-NEXT: mov d2, v6.d[1] -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w13, w8, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: fcvtzu w13, d6 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: mov v0.b[7], w12 -; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fmov w11, s1 -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: mov w10, v1.s[1] +; CHECK-NEXT: csel w9, w11, w8, lo ; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: mov d1, v7.d[1] -; CHECK-NEXT: mov v0.b[8], w11 +; CHECK-NEXT: mov v0.b[5], w10 +; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: fmov w11, s2 +; CHECK-NEXT: mov w10, v2.s[1] +; CHECK-NEXT: fmov s2, w12 +; CHECK-NEXT: fcvtzu w12, d1 +; CHECK-NEXT: mov d1, v6.d[1] +; CHECK-NEXT: mov v0.b[6], w11 +; CHECK-NEXT: fcvtzu w11, d5 ; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: cmp w12, #255 ; CHECK-NEXT: csel w9, w12, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w11, w13, w8, lo -; CHECK-NEXT: fcvtzu w13, d7 -; CHECK-NEXT: mov v0.b[9], w10 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: fmov s3, w11 +; CHECK-NEXT: cmp w11, #255 +; CHECK-NEXT: mov v0.b[7], w10 +; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: fmov w12, s2 +; CHECK-NEXT: mov w10, v2.s[1] +; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: fcvtzu w11, d1 -; CHECK-NEXT: mov w12, v2.s[1] -; CHECK-NEXT: mov v0.b[10], w10 -; CHECK-NEXT: mov v3.s[1], w9 +; CHECK-NEXT: mov d1, v7.d[1] +; CHECK-NEXT: mov v0.b[8], w12 +; CHECK-NEXT: fcvtzu w12, d6 +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: cmp w11, #255 ; CHECK-NEXT: csel w9, w11, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v0.b[11], w12 -; CHECK-NEXT: fmov w10, s3 +; CHECK-NEXT: cmp w12, #255 +; CHECK-NEXT: mov v0.b[9], w10 +; CHECK-NEXT: fmov w11, s2 +; CHECK-NEXT: mov w10, v2.s[1] +; CHECK-NEXT: mov v0.b[10], w11 +; CHECK-NEXT: csel w11, w12, w8, lo +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov v0.b[11], w10 +; CHECK-NEXT: fcvtzu w10, d7 +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: fmov w12, s2 +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: mov w11, v2.s[1] +; CHECK-NEXT: csel w8, w10, w8, lo ; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, v3.s[1] -; CHECK-NEXT: mov v0.b[12], w10 +; CHECK-NEXT: mov v0.b[12], w12 ; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: mov v0.b[13], w8 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov w9, v1.s[1] -; CHECK-NEXT: mov v0.b[14], w8 -; CHECK-NEXT: mov v0.b[15], w9 +; CHECK-NEXT: mov v0.b[13], w11 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov v0.b[14], w9 +; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptoui.sat.v16f64.v16i8(<16 x double> %f) ret <16 x i8> %x @@ -2928,45 +2927,45 @@ ; CHECK-LABEL: test_unsigned_v8f64_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d4, v3.d[1] +; CHECK-NEXT: mov d5, v2.d[1] ; CHECK-NEXT: fcvtzu w10, d3 -; CHECK-NEXT: mov d3, v2.d[1] -; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: fcvtzu w12, d2 +; CHECK-NEXT: mov w9, #65535 +; CHECK-NEXT: fcvtzu w11, d2 +; CHECK-NEXT: mov d2, v1.d[1] ; CHECK-NEXT: fcvtzu w13, d1 -; CHECK-NEXT: fcvtzu w9, d4 -; CHECK-NEXT: mov d4, v1.d[1] -; CHECK-NEXT: fcvtzu w11, d3 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: fmov s19, w10 -; CHECK-NEXT: fcvtzu w10, d4 -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: mov v19.s[1], w9 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: fmov s18, w12 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: csel w12, w13, w8, lo -; CHECK-NEXT: fcvtzu w13, d0 -; CHECK-NEXT: mov v18.s[1], w11 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: fmov s17, w12 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v17.s[1], w10 -; CHECK-NEXT: fmov s16, w8 +; CHECK-NEXT: fcvtzu w8, d4 +; CHECK-NEXT: fcvtzu w12, d5 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lo +; CHECK-NEXT: cmp w10, w9 +; CHECK-NEXT: csel w10, w10, w9, lo +; CHECK-NEXT: cmp w12, w9 +; CHECK-NEXT: fmov s6, w10 +; CHECK-NEXT: fcvtzu w10, d2 +; CHECK-NEXT: csel w12, w12, w9, lo +; CHECK-NEXT: cmp w11, w9 +; CHECK-NEXT: mov v6.s[1], w8 +; CHECK-NEXT: csel w8, w11, w9, lo +; CHECK-NEXT: fmov s5, w8 +; CHECK-NEXT: cmp w10, w9 +; CHECK-NEXT: csel w8, w10, w9, lo +; CHECK-NEXT: fcvtzu w10, d1 +; CHECK-NEXT: cmp w13, w9 +; CHECK-NEXT: fcvtzu w11, d0 +; CHECK-NEXT: mov v5.s[1], w12 +; CHECK-NEXT: csel w12, w13, w9, lo +; CHECK-NEXT: fmov s4, w12 +; CHECK-NEXT: cmp w10, w9 +; CHECK-NEXT: csel w10, w10, w9, lo +; CHECK-NEXT: cmp w11, w9 +; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: csel w8, w11, w9, lo +; CHECK-NEXT: fmov s3, w8 ; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: mov v16.s[1], w9 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b +; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptoui.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -2976,81 +2975,81 @@ ; CHECK-LABEL: test_unsigned_v16f64_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d16, v3.d[1] -; CHECK-NEXT: fcvtzu w9, d3 -; CHECK-NEXT: mov d3, v2.d[1] +; CHECK-NEXT: mov d17, v2.d[1] +; CHECK-NEXT: fcvtzu w10, d3 ; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: fcvtzu w10, d2 +; CHECK-NEXT: fcvtzu w11, d2 ; CHECK-NEXT: mov d2, v1.d[1] -; CHECK-NEXT: fcvtzu w11, d1 -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu w12, d16 -; CHECK-NEXT: fcvtzu w13, d0 -; CHECK-NEXT: fcvtzu w14, d3 -; CHECK-NEXT: mov d0, v7.d[1] -; CHECK-NEXT: fcvtzu w15, d2 -; CHECK-NEXT: fcvtzu w17, d6 -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: fcvtzu w16, d1 -; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: fcvtzu w9, d16 +; CHECK-NEXT: fcvtzu w12, d17 ; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: csel w14, w14, w8, lo ; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: fmov s19, w9 -; CHECK-NEXT: csel w9, w15, w8, lo +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: fmov s19, w10 +; CHECK-NEXT: csel w10, w12, w8, lo +; CHECK-NEXT: cmp w11, w8 +; CHECK-NEXT: fcvtzu w12, d2 +; CHECK-NEXT: mov d2, v7.d[1] +; CHECK-NEXT: mov v19.s[1], w9 +; CHECK-NEXT: csel w9, w11, w8, lo +; CHECK-NEXT: fcvtzu w11, d1 +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: fmov s18, w9 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: csel w9, w12, w8, lo +; CHECK-NEXT: fcvtzu w12, d7 ; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: fcvtzu w15, d0 +; CHECK-NEXT: mov v18.s[1], w10 +; CHECK-NEXT: csel w10, w11, w8, lo +; CHECK-NEXT: fmov s17, w10 +; CHECK-NEXT: fcvtzu w10, d1 +; CHECK-NEXT: fcvtzu w11, d2 +; CHECK-NEXT: mov v17.s[1], w9 +; CHECK-NEXT: fcvtzu w9, d0 +; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: mov d0, v6.d[1] +; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w11, w8 ; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: mov v19.s[1], w12 -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fcvtzu w12, d7 -; CHECK-NEXT: fmov s18, w10 -; CHECK-NEXT: csel w10, w16, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: fcvtzu w16, d0 -; CHECK-NEXT: csel w13, w13, w8, lo -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: csel w15, w15, w8, lo ; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: fcvtzu w13, d0 ; CHECK-NEXT: mov d0, v5.d[1] -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: mov v18.s[1], w14 -; CHECK-NEXT: fmov s23, w12 -; CHECK-NEXT: csel w12, w16, w8, lo -; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fcvtzu w16, d0 +; CHECK-NEXT: fmov s16, w9 +; CHECK-NEXT: csel w9, w12, w8, lo +; CHECK-NEXT: fmov s23, w9 +; CHECK-NEXT: fcvtzu w9, d6 +; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: fcvtzu w12, d0 ; CHECK-NEXT: mov d0, v4.d[1] -; CHECK-NEXT: csel w14, w17, w8, lo -; CHECK-NEXT: fcvtzu w17, d5 -; CHECK-NEXT: fmov s17, w11 -; CHECK-NEXT: mov v23.s[1], w15 -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s22, w14 -; CHECK-NEXT: csel w14, w16, w8, lo -; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fcvtzu w16, d0 -; CHECK-NEXT: csel w15, w17, w8, lo -; CHECK-NEXT: fcvtzu w11, d4 -; CHECK-NEXT: mov v22.s[1], w12 -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s21, w15 -; CHECK-NEXT: csel w12, w16, w8, lo +; CHECK-NEXT: mov v23.s[1], w11 +; CHECK-NEXT: csel w11, w13, w8, lo +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: fcvtzu w13, d5 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: fmov s22, w9 +; CHECK-NEXT: csel w9, w12, w8, lo +; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: csel w12, w13, w8, lo +; CHECK-NEXT: fcvtzu w13, d4 +; CHECK-NEXT: mov v22.s[1], w11 +; CHECK-NEXT: fcvtzu w11, d0 +; CHECK-NEXT: fmov s21, w12 +; CHECK-NEXT: mov v16.s[1], w10 +; CHECK-NEXT: adrp x10, .LCPI85_0 ; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: csel w8, w11, w8, lo -; CHECK-NEXT: mov v17.s[1], w9 -; CHECK-NEXT: adrp x9, .LCPI85_0 -; CHECK-NEXT: mov v21.s[1], w14 -; CHECK-NEXT: fmov s16, w13 +; CHECK-NEXT: ldr q1, [x10, :lo12:.LCPI85_0] +; CHECK-NEXT: mov v21.s[1], w9 +; CHECK-NEXT: csel w9, w11, w8, lo +; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: csel w8, w13, w8, lo ; CHECK-NEXT: fmov s20, w8 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI85_0] -; CHECK-NEXT: mov v16.s[1], w10 -; CHECK-NEXT: mov v20.s[1], w12 ; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b +; CHECK-NEXT: mov v20.s[1], w9 ; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptoui.sat.v16f64.v16i16(<16 x double> %f) Index: llvm/test/CodeGen/AArch64/funnel-shift-rot.ll =================================================================== --- llvm/test/CodeGen/AArch64/funnel-shift-rot.ll +++ llvm/test/CodeGen/AArch64/funnel-shift-rot.ll @@ -40,13 +40,13 @@ define i16 @rotl_i16(i16 %x, i16 %z) { ; CHECK-LABEL: rotl_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: and w9, w1, #0xf -; CHECK-NEXT: and w8, w8, #0xf -; CHECK-NEXT: and w10, w0, #0xffff -; CHECK-NEXT: lsl w9, w0, w9 -; CHECK-NEXT: lsr w8, w10, w8 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: and w10, w1, #0xf +; CHECK-NEXT: and w9, w9, #0xf +; CHECK-NEXT: lsl w10, w0, w10 +; CHECK-NEXT: lsr w8, w8, w9 +; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) ret i16 %f @@ -132,13 +132,13 @@ define i16 @rotr_i16(i16 %x, i16 %z) { ; CHECK-LABEL: rotr_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: and w9, w1, #0xf -; CHECK-NEXT: and w8, w8, #0xf -; CHECK-NEXT: and w10, w0, #0xffff -; CHECK-NEXT: lsr w9, w10, w9 -; CHECK-NEXT: lsl w8, w0, w8 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: and w10, w1, #0xf +; CHECK-NEXT: and w9, w9, #0xf +; CHECK-NEXT: lsr w8, w8, w10 +; CHECK-NEXT: lsl w9, w0, w9 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z) ret i16 %f @@ -171,8 +171,8 @@ ; CHECK-NEXT: neg v3.4s, v1.4s ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-NEXT: neg v1.4s, v1.4s -; CHECK-NEXT: and v2.16b, v3.16b, v2.16b -; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s +; CHECK-NEXT: and v3.16b, v3.16b, v2.16b +; CHECK-NEXT: ushl v2.4s, v0.4s, v3.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/funnel-shift.ll =================================================================== --- llvm/test/CodeGen/AArch64/funnel-shift.ll +++ llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -19,11 +19,11 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: fshl_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w1, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr w9, w1, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsl w10, w0, w2 -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) @@ -33,10 +33,10 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: fshl_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr x9, x1, #1 +; CHECK-NEXT: lsr x8, x1, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsl x10, x0, x2 -; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: lsr x8, x8, x9 ; CHECK-NEXT: orr x0, x10, x8 ; CHECK-NEXT: ret %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) @@ -47,17 +47,17 @@ ; CHECK-LABEL: fshl_i128: ; CHECK: // %bb.0: ; CHECK-NEXT: tst x4, #0x40 -; CHECK-NEXT: mvn w8, w4 +; CHECK-NEXT: mvn w11, w4 +; CHECK-NEXT: csel x8, x3, x0, ne ; CHECK-NEXT: csel x9, x2, x3, ne -; CHECK-NEXT: csel x10, x3, x0, ne +; CHECK-NEXT: lsl x10, x8, x4 ; CHECK-NEXT: lsr x9, x9, #1 -; CHECK-NEXT: lsl x11, x10, x4 +; CHECK-NEXT: lsr x8, x8, #1 ; CHECK-NEXT: csel x12, x0, x1, ne -; CHECK-NEXT: lsr x10, x10, #1 -; CHECK-NEXT: lsr x9, x9, x8 +; CHECK-NEXT: lsr x9, x9, x11 ; CHECK-NEXT: lsl x12, x12, x4 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: orr x0, x11, x9 +; CHECK-NEXT: lsr x8, x8, x11 +; CHECK-NEXT: orr x0, x10, x9 ; CHECK-NEXT: orr x1, x12, x8 ; CHECK-NEXT: ret %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) @@ -72,16 +72,16 @@ ; CHECK-NEXT: mov x9, #31883 ; CHECK-NEXT: and x8, x2, #0x1fffffffff ; CHECK-NEXT: movk x9, #3542, lsl #16 -; CHECK-NEXT: ubfiz x10, x1, #26, #37 ; CHECK-NEXT: movk x9, #51366, lsl #32 ; CHECK-NEXT: movk x9, #56679, lsl #48 ; CHECK-NEXT: umulh x8, x8, x9 ; CHECK-NEXT: mov w9, #37 ; CHECK-NEXT: ubfx x8, x8, #5, #27 ; CHECK-NEXT: msub w8, w8, w9, w2 -; CHECK-NEXT: mvn w9, w8 +; CHECK-NEXT: ubfiz x9, x1, #26, #37 +; CHECK-NEXT: mvn w10, w8 ; CHECK-NEXT: lsl x8, x0, x8 -; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: lsr x9, x9, x10 ; CHECK-NEXT: orr x0, x8, x9 ; CHECK-NEXT: ret %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) @@ -178,11 +178,11 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: fshr_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: lsl w8, w0, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl w9, w0, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: lsl w8, w8, w9 ; CHECK-NEXT: orr w0, w8, w10 ; CHECK-NEXT: ret %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) @@ -192,10 +192,10 @@ define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: fshr_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl x9, x0, #1 +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsr x10, x1, x2 -; CHECK-NEXT: lsl x8, x9, x8 +; CHECK-NEXT: lsl x8, x8, x9 ; CHECK-NEXT: orr x0, x8, x10 ; CHECK-NEXT: ret %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z) @@ -210,18 +210,18 @@ ; CHECK-NEXT: mov x9, #31883 ; CHECK-NEXT: and x8, x2, #0x1fffffffff ; CHECK-NEXT: movk x9, #3542, lsl #16 -; CHECK-NEXT: lsl x10, x1, #27 +; CHECK-NEXT: lsl x10, x0, #1 ; CHECK-NEXT: movk x9, #51366, lsl #32 -; CHECK-NEXT: lsl x11, x0, #1 ; CHECK-NEXT: movk x9, #56679, lsl #48 ; CHECK-NEXT: umulh x8, x8, x9 ; CHECK-NEXT: mov w9, #37 ; CHECK-NEXT: lsr x8, x8, #5 ; CHECK-NEXT: msub w8, w8, w9, w2 +; CHECK-NEXT: lsl x9, x1, #27 ; CHECK-NEXT: add w8, w8, #27 -; CHECK-NEXT: mvn w9, w8 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: lsl x9, x11, x9 +; CHECK-NEXT: mvn w11, w8 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: lsl x9, x10, x11 ; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: ret %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) @@ -348,13 +348,13 @@ define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_shl_fshl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsr w10, w1, #1 -; CHECK-NEXT: lsr w9, w10, w9 -; CHECK-NEXT: lsl w8, w0, w8 +; CHECK-NEXT: lsr w8, w1, #1 +; CHECK-NEXT: mov w9, w2 +; CHECK-NEXT: mvn w10, w2 +; CHECK-NEXT: lsl w9, w0, w9 +; CHECK-NEXT: lsr w8, w8, w10 ; CHECK-NEXT: lsl w10, w1, w2 -; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: orr w8, w9, w8 ; CHECK-NEXT: orr w0, w8, w10 ; CHECK-NEXT: ret %shy = shl i32 %y, %s @@ -380,13 +380,13 @@ define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_shl_fshl_commute: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsr w10, w1, #1 -; CHECK-NEXT: lsr w9, w10, w9 -; CHECK-NEXT: lsl w8, w0, w8 +; CHECK-NEXT: lsr w8, w1, #1 +; CHECK-NEXT: mov w9, w2 +; CHECK-NEXT: mvn w10, w2 +; CHECK-NEXT: lsl w9, w0, w9 +; CHECK-NEXT: lsr w8, w8, w10 ; CHECK-NEXT: lsl w10, w1, w2 -; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: orr w8, w9, w8 ; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %shy = shl i32 %y, %s @@ -412,13 +412,13 @@ define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_lshr_fshr: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsl w10, w1, #1 -; CHECK-NEXT: lsr w8, w0, w8 -; CHECK-NEXT: lsl w9, w10, w9 +; CHECK-NEXT: lsl w8, w1, #1 +; CHECK-NEXT: mov w9, w2 +; CHECK-NEXT: mvn w10, w2 +; CHECK-NEXT: lsr w9, w0, w9 +; CHECK-NEXT: lsl w8, w8, w10 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: orr w0, w8, w10 ; CHECK-NEXT: ret %shy = lshr i32 %y, %s @@ -430,9 +430,9 @@ define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_lshr_rotr: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, w2 -; CHECK-NEXT: ror w9, w1, w2 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: ror w8, w1, w2 +; CHECK-NEXT: lsr w9, w0, w2 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %shx = lshr i32 %x, %s %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s) @@ -443,13 +443,13 @@ define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_lshr_fshr_commute: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsl w10, w1, #1 -; CHECK-NEXT: lsr w8, w0, w8 -; CHECK-NEXT: lsl w9, w10, w9 +; CHECK-NEXT: lsl w8, w1, #1 +; CHECK-NEXT: mov w9, w2 +; CHECK-NEXT: mvn w10, w2 +; CHECK-NEXT: lsr w9, w0, w9 +; CHECK-NEXT: lsl w8, w8, w10 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %shy = lshr i32 %y, %s @@ -461,9 +461,9 @@ define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_lshr_rotr_commute: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, w2 -; CHECK-NEXT: ror w9, w1, w2 -; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ror w8, w1, w2 +; CHECK-NEXT: lsr w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %shx = lshr i32 %x, %s %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s) @@ -474,11 +474,11 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_shl_fshl_simplify: ; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr w9, w0, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsl w10, w1, w2 -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %shy = shl i32 %y, %s @@ -490,11 +490,11 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_lshr_fshr_simplify: ; CHECK: // %bb.0: +; CHECK-NEXT: lsl w8, w0, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl w9, w0, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: lsl w8, w8, w9 ; CHECK-NEXT: orr w0, w8, w10 ; CHECK-NEXT: ret %shy = lshr i32 %y, %s Index: llvm/test/CodeGen/AArch64/global-merge-3.ll =================================================================== --- llvm/test/CodeGen/AArch64/global-merge-3.ll +++ llvm/test/CodeGen/AArch64/global-merge-3.ll @@ -11,14 +11,14 @@ ;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_x@PAGE+12 ;CHECK-APPLE-IOS-NOT: adrp ;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_x@PAGEOFF+12 -;CHECK-APPLE-IOS: str w1, [x9, #400] ;CHECK-APPLE-IOS: str w0, [x9] +;CHECK-APPLE-IOS: str w1, [x9, #400] ;CHECK-APPLE-IOS: str w2, [x8, _z@PAGEOFF] ;CHECK: adrp x8, z ;CHECK: adrp x9, .L_MergedGlobals+12 ;CHECK: add x9, x9, :lo12:.L_MergedGlobals+12 -;CHECK: str w1, [x9, #400] ;CHECK: str w0, [x9] +;CHECK: str w1, [x9, #400] ;CHECK: str w2, [x8, :lo12:z] %x3 = getelementptr inbounds [100 x i32], [100 x i32]* @x, i32 0, i64 3 %y3 = getelementptr inbounds [100 x i32], [100 x i32]* @y, i32 0, i64 3 Index: llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll =================================================================== --- llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll +++ llvm/test/CodeGen/AArch64/global-merge-group-by-use.ll @@ -53,9 +53,9 @@ ; CHECK-LABEL: f3: ; CHECK: ; %bb.0: ; CHECK-NEXT: adrp x8, _m3@PAGE -; CHECK-NEXT: adrp x9, __MergedGlobals@PAGE ; CHECK-NEXT: str w0, [x8, _m3@PAGEOFF] -; CHECK-NEXT: str w1, [x9, __MergedGlobals@PAGEOFF] +; CHECK-NEXT: adrp x8, __MergedGlobals@PAGE +; CHECK-NEXT: str w1, [x8, __MergedGlobals@PAGEOFF] ; CHECK-NEXT: ret store i32 %a1, i32* @m3, align 4 store i32 %a2, i32* @n3, align 4 Index: llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll =================================================================== --- llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll +++ llvm/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll @@ -26,9 +26,9 @@ ; CHECK-LABEL: f2: define void @f2(i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: adrp x8, _m2@PAGE -; CHECK-NEXT: adrp x9, _n2@PAGE ; CHECK-NEXT: str w0, [x8, _m2@PAGEOFF] -; CHECK-NEXT: str w1, [x9, _n2@PAGEOFF] +; CHECK-NEXT: adrp x8, _n2@PAGE +; CHECK-NEXT: str w1, [x8, _n2@PAGEOFF] ; CHECK-NEXT: ret store i32 %a1, i32* @m2, align 4 store i32 %a2, i32* @n2, align 4 @@ -58,9 +58,9 @@ ; CHECK-LABEL: f4: define void @f4(i32 %a1, i32 %a2) nounwind { ; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8 -; CHECK-NEXT: adrp x9, _n4@PAGE ; CHECK-NEXT: str w0, [x8, [[SET]]@PAGEOFF+8] -; CHECK-NEXT: str w1, [x9, _n4@PAGEOFF] +; CHECK-NEXT: adrp x8, _n4@PAGE +; CHECK-NEXT: str w1, [x8, _n4@PAGEOFF] ; CHECK-NEXT: ret store i32 %a1, i32* @m3, align 4 store i32 %a2, i32* @n4, align 4 Index: llvm/test/CodeGen/AArch64/half.ll =================================================================== --- llvm/test/CodeGen/AArch64/half.ll +++ llvm/test/CodeGen/AArch64/half.ll @@ -99,16 +99,16 @@ define i16 @test_fccmp(i1 %a, i16 %in) { ; CHECK-LABEL: test_fccmp: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #24576 ; CHECK-NEXT: fmov s0, w1 +; CHECK-NEXT: mov w8, #24576 ; CHECK-NEXT: movk w8, #15974, lsl #16 -; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov w8, #16384 +; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: movk w8, #15428, lsl #16 -; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: fccmp s0, s2, #8, pl ; CHECK-NEXT: csinc w8, w8, wzr, mi ; CHECK-NEXT: fcmp s0, s1 Index: llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll =================================================================== --- llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -203,8 +203,8 @@ ; CHECK-LABEL: vec_4xi32_nonsplat_eq: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 -; CHECK-NEXT: neg v1.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: neg v1.4s, v1.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s ; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 Index: llvm/test/CodeGen/AArch64/i128-math.ll =================================================================== --- llvm/test/CodeGen/AArch64/i128-math.ll +++ llvm/test/CodeGen/AArch64/i128-math.ll @@ -275,16 +275,16 @@ ; CHECK-NEXT: madd x8, x1, x2, x8 ; CHECK-NEXT: umulh x10, x1, x2 ; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: cset w9, hs +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: cset w11, hs ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: ccmp xzr, x10, #0, eq -; CHECK-NEXT: umulh x10, x3, x0 ; CHECK-NEXT: mul x0, x0, x2 ; CHECK-NEXT: ccmp xzr, x10, #0, eq -; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: orr w9, w10, w9 +; CHECK-NEXT: mov x1, x8 +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: orr w9, w9, w11 ; CHECK-NEXT: eor w2, w9, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) @@ -305,16 +305,16 @@ ; CHECK-NEXT: madd x8, x1, x2, x8 ; CHECK-NEXT: umulh x10, x1, x2 ; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: cset w9, hs +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: cset w11, hs ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: ccmp xzr, x10, #0, eq -; CHECK-NEXT: umulh x10, x3, x0 ; CHECK-NEXT: mul x0, x0, x2 ; CHECK-NEXT: ccmp xzr, x10, #0, eq -; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: orr w2, w10, w9 +; CHECK-NEXT: mov x1, x8 +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: orr w2, w9, w11 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -333,17 +333,17 @@ ; CHECK-NEXT: madd x8, x1, x2, x8 ; CHECK-NEXT: umulh x10, x1, x2 ; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: cset w9, hs +; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: cset w11, hs ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x3, #0, #4, ne ; CHECK-NEXT: ccmp xzr, x10, #0, eq -; CHECK-NEXT: umulh x10, x3, x0 -; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: ccmp xzr, x9, #0, eq +; CHECK-NEXT: mul x9, x0, x2 ; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: orr w9, w10, w9 -; CHECK-NEXT: mul x10, x0, x2 -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csinv x0, x10, xzr, eq +; CHECK-NEXT: orr w10, w10, w11 +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: csinv x0, x9, xzr, eq ; CHECK-NEXT: csinv x1, x8, xzr, eq ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) @@ -428,8 +428,8 @@ ; CHECK-NEXT: eor x9, x19, x20 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: asr x9, x9, #63 -; CHECK-NEXT: eor x10, x9, #0x7fffffffffffffff ; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: eor x10, x9, #0x7fffffffffffffff ; CHECK-NEXT: csinv x0, x0, x9, eq ; CHECK-NEXT: csel x1, x10, x1, ne ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload Index: llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll =================================================================== --- llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll +++ llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll @@ -9,9 +9,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x ; CHECK-NEXT: add x8, x8, :lo12:x +; CHECK-NEXT: ldp x8, x9, [x8] ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: ldp x8, x9, [x8] ; CHECK-NEXT: stp x8, x9, [x10] ; CHECK-NEXT: ret %tmp = load volatile i128, i128* @x @@ -24,9 +24,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x ; CHECK-NEXT: add x8, x8, :lo12:x +; CHECK-NEXT: ldp x8, x9, [x8, #504] ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: ldp x8, x9, [x8, #504] ; CHECK-NEXT: stp x8, x9, [x10, #504] ; CHECK-NEXT: ret %tmp = load volatile i128, i128* bitcast (i8* getelementptr (i8, i8* bitcast (i128* @x to i8*), i64 504) to i128*) @@ -42,8 +42,8 @@ ; CHECK-NEXT: add x8, x8, #512 ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: add x10, x10, #512 ; CHECK-NEXT: ldp x8, x9, [x8] +; CHECK-NEXT: add x10, x10, #512 ; CHECK-NEXT: stp x8, x9, [x10] ; CHECK-NEXT: ret %tmp = load volatile i128, i128* bitcast (i8* getelementptr (i8, i8* bitcast (i128* @x to i8*), i64 512) to i128*) @@ -56,9 +56,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x ; CHECK-NEXT: add x8, x8, :lo12:x +; CHECK-NEXT: ldp x8, x9, [x8, #-512] ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: ldp x8, x9, [x8, #-512] ; CHECK-NEXT: stp x8, x9, [x10, #-512] ; CHECK-NEXT: ret %tmp = load volatile i128, i128* bitcast (i8* getelementptr (i8, i8* bitcast (i128* @x to i8*), i64 -512) to i128*) @@ -74,8 +74,8 @@ ; CHECK-NEXT: sub x8, x8, #520 ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: sub x10, x10, #520 ; CHECK-NEXT: ldp x8, x9, [x8] +; CHECK-NEXT: sub x10, x10, #520 ; CHECK-NEXT: stp x8, x9, [x10] ; CHECK-NEXT: ret %tmp = load volatile i128, i128* bitcast (i8* getelementptr (i8, i8* bitcast (i128* @x to i8*), i64 -520) to i128*) @@ -91,8 +91,8 @@ ; CHECK-NEXT: sub x8, x8, #520 ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: sub x10, x10, #520 ; CHECK-NEXT: ldp x8, x9, [x8] +; CHECK-NEXT: sub x10, x10, #520 ; CHECK-NEXT: stp x8, x9, [x10] ; CHECK-NEXT: ret %tmp = load volatile i128, i128* bitcast (i8* getelementptr (i8, i8* bitcast (i128* @x to i8*), i64 -520) to i128*) @@ -108,8 +108,8 @@ ; CHECK-NEXT: add x8, x8, #503 ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: add x10, x10, #503 ; CHECK-NEXT: ldp x8, x9, [x8] +; CHECK-NEXT: add x10, x10, #503 ; CHECK-NEXT: stp x8, x9, [x10] ; CHECK-NEXT: ret %tmp = load volatile i128, i128* bitcast (i8* getelementptr (i8, i8* bitcast (i128* @x to i8*), i64 503) to i128*) Index: llvm/test/CodeGen/AArch64/i256-math.ll =================================================================== --- llvm/test/CodeGen/AArch64/i256-math.ll +++ llvm/test/CodeGen/AArch64/i256-math.ll @@ -214,13 +214,13 @@ ; CHECK-NEXT: adcs x9, x1, x5 ; CHECK-NEXT: adcs x10, x2, x6 ; CHECK-NEXT: adcs x11, x3, x7 -; CHECK-NEXT: cset w12, vs -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: cmp w12, #0 -; CHECK-NEXT: csel x0, x13, x8, ne -; CHECK-NEXT: eor x8, x13, #0x8000000000000000 -; CHECK-NEXT: csel x1, x13, x9, ne -; CHECK-NEXT: csel x2, x13, x10, ne +; CHECK-NEXT: asr x12, x11, #63 +; CHECK-NEXT: cset w13, vs +; CHECK-NEXT: cmp w13, #0 +; CHECK-NEXT: csel x0, x12, x8, ne +; CHECK-NEXT: eor x8, x12, #0x8000000000000000 +; CHECK-NEXT: csel x1, x12, x9, ne +; CHECK-NEXT: csel x2, x12, x10, ne ; CHECK-NEXT: csel x3, x8, x11, ne ; CHECK-NEXT: ret %1 = tail call i256 @llvm.sadd.sat.i256(i256 %x, i256 %y) @@ -292,13 +292,13 @@ ; CHECK-NEXT: sbcs x9, x1, x5 ; CHECK-NEXT: sbcs x10, x2, x6 ; CHECK-NEXT: sbcs x11, x3, x7 -; CHECK-NEXT: cset w12, vs -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: cmp w12, #0 -; CHECK-NEXT: csel x0, x13, x8, ne -; CHECK-NEXT: eor x8, x13, #0x8000000000000000 -; CHECK-NEXT: csel x1, x13, x9, ne -; CHECK-NEXT: csel x2, x13, x10, ne +; CHECK-NEXT: asr x12, x11, #63 +; CHECK-NEXT: cset w13, vs +; CHECK-NEXT: cmp w13, #0 +; CHECK-NEXT: csel x0, x12, x8, ne +; CHECK-NEXT: eor x8, x12, #0x8000000000000000 +; CHECK-NEXT: csel x1, x12, x9, ne +; CHECK-NEXT: csel x2, x12, x10, ne ; CHECK-NEXT: csel x3, x8, x11, ne ; CHECK-NEXT: ret %1 = tail call i256 @llvm.ssub.sat.i256(i256 %x, i256 %y) Index: llvm/test/CodeGen/AArch64/icmp-shift-opt.ll =================================================================== --- llvm/test/CodeGen/AArch64/icmp-shift-opt.ll +++ llvm/test/CodeGen/AArch64/icmp-shift-opt.ll @@ -136,12 +136,12 @@ define i1 @opt_setcc_shl_ne_zero_i256(i256 %a) nounwind { ; CHECK-LABEL: opt_setcc_shl_ne_zero_i256: ; CHECK: // %bb.0: -; CHECK-NEXT: orr x8, x2, x0 -; CHECK-NEXT: extr x9, x3, x2, #47 -; CHECK-NEXT: extr x10, x1, x0, #47 -; CHECK-NEXT: extr x8, x8, x1, #47 -; CHECK-NEXT: orr x9, x10, x9 -; CHECK-NEXT: orr x8, x8, x9 +; CHECK-NEXT: extr x8, x3, x2, #47 +; CHECK-NEXT: extr x9, x1, x0, #47 +; CHECK-NEXT: orr x10, x2, x0 +; CHECK-NEXT: extr x10, x10, x1, #47 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: orr x8, x10, x8 ; CHECK-NEXT: cmp x8, #0 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/insert-extend.ll =================================================================== --- llvm/test/CodeGen/AArch64/insert-extend.ll +++ llvm/test/CodeGen/AArch64/insert-extend.ll @@ -48,24 +48,24 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w3 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: add x10, x2, x8 -; CHECK-NEXT: add x11, x0, x9 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: sxtw x9, w3 +; CHECK-NEXT: add x10, x0, x8 +; CHECK-NEXT: add x11, x2, x9 ; CHECK-NEXT: add x12, x10, x8 ; CHECK-NEXT: add x13, x11, x9 -; CHECK-NEXT: add x8, x12, x8 ; CHECK-NEXT: add x9, x13, x9 -; CHECK-NEXT: ldp s0, s6, [x11] -; CHECK-NEXT: ldp s3, s7, [x10] -; CHECK-NEXT: ldp s1, s5, [x8] -; CHECK-NEXT: ldp s2, s4, [x9] -; CHECK-NEXT: ld1 { v1.s }[1], [x12], #4 -; CHECK-NEXT: ld1 { v2.s }[1], [x13], #4 +; CHECK-NEXT: add x8, x12, x8 +; CHECK-NEXT: ldp s0, s6, [x10] +; CHECK-NEXT: ldp s1, s5, [x9] +; CHECK-NEXT: ldp s2, s4, [x8] +; CHECK-NEXT: ldp s3, s7, [x11] +; CHECK-NEXT: ld1 { v1.s }[1], [x13], #4 +; CHECK-NEXT: ld1 { v2.s }[1], [x12], #4 ; CHECK-NEXT: ld1 { v3.s }[1], [x2], #4 ; CHECK-NEXT: ld1 { v0.s }[1], [x0], #4 -; CHECK-NEXT: ld1 { v5.s }[1], [x12] -; CHECK-NEXT: ld1 { v4.s }[1], [x13] +; CHECK-NEXT: ld1 { v5.s }[1], [x13] +; CHECK-NEXT: ld1 { v4.s }[1], [x12] ; CHECK-NEXT: ld1 { v7.s }[1], [x2] ; CHECK-NEXT: ld1 { v6.s }[1], [x0] ; CHECK-NEXT: usubl v0.8h, v0.8b, v3.8b @@ -91,69 +91,69 @@ ; CHECK-NEXT: ext v18.16b, v7.16b, v7.16b, #8 ; CHECK-NEXT: sub v2.4s, v2.4s, v5.4s ; CHECK-NEXT: zip1 v5.4s, v0.4s, v3.4s +; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s ; CHECK-NEXT: uzp2 v19.4s, v7.4s, v16.4s ; CHECK-NEXT: uzp1 v7.4s, v7.4s, v16.4s -; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s -; CHECK-NEXT: uzp1 v6.4s, v18.4s, v16.4s ; CHECK-NEXT: zip2 v4.4s, v2.4s, v1.4s -; CHECK-NEXT: uzp2 v16.4s, v18.4s, v16.4s ; CHECK-NEXT: mov v2.s[1], v1.s[0] -; CHECK-NEXT: ext v1.16b, v0.16b, v5.16b, #8 +; CHECK-NEXT: uzp1 v1.4s, v18.4s, v16.4s +; CHECK-NEXT: uzp2 v6.4s, v18.4s, v16.4s +; CHECK-NEXT: ext v5.16b, v0.16b, v5.16b, #8 ; CHECK-NEXT: mov v0.s[3], v3.s[2] ; CHECK-NEXT: add v7.4s, v19.4s, v7.4s -; CHECK-NEXT: sub v3.4s, v6.4s, v16.4s -; CHECK-NEXT: rev64 v5.4s, v7.4s -; CHECK-NEXT: mov v2.d[1], v1.d[1] +; CHECK-NEXT: sub v1.4s, v1.4s, v6.4s +; CHECK-NEXT: mov v2.d[1], v5.d[1] ; CHECK-NEXT: mov v4.d[1], v0.d[1] -; CHECK-NEXT: rev64 v6.4s, v3.4s -; CHECK-NEXT: sub v0.4s, v7.4s, v5.4s +; CHECK-NEXT: rev64 v3.4s, v7.4s +; CHECK-NEXT: rev64 v6.4s, v1.4s ; CHECK-NEXT: add v5.4s, v4.4s, v2.4s ; CHECK-NEXT: sub v2.4s, v2.4s, v4.4s -; CHECK-NEXT: sub v1.4s, v3.4s, v6.4s +; CHECK-NEXT: sub v0.4s, v7.4s, v3.4s +; CHECK-NEXT: sub v3.4s, v1.4s, v6.4s ; CHECK-NEXT: rev64 v4.4s, v5.4s ; CHECK-NEXT: addp v6.4s, v7.4s, v5.4s ; CHECK-NEXT: rev64 v7.4s, v2.4s -; CHECK-NEXT: addp v3.4s, v3.4s, v2.4s +; CHECK-NEXT: addp v1.4s, v1.4s, v2.4s ; CHECK-NEXT: sub v4.4s, v5.4s, v4.4s ; CHECK-NEXT: zip1 v16.4s, v6.4s, v6.4s ; CHECK-NEXT: sub v2.4s, v2.4s, v7.4s -; CHECK-NEXT: ext v17.16b, v1.16b, v3.16b, #8 -; CHECK-NEXT: ext v5.16b, v3.16b, v2.16b, #4 -; CHECK-NEXT: ext v7.16b, v6.16b, v4.16b, #4 +; CHECK-NEXT: ext v17.16b, v3.16b, v1.16b, #8 +; CHECK-NEXT: ext v5.16b, v6.16b, v4.16b, #4 +; CHECK-NEXT: ext v7.16b, v1.16b, v2.16b, #4 ; CHECK-NEXT: ext v18.16b, v0.16b, v6.16b, #4 ; CHECK-NEXT: trn2 v0.4s, v16.4s, v0.4s -; CHECK-NEXT: ext v16.16b, v17.16b, v1.16b, #4 -; CHECK-NEXT: zip2 v5.4s, v5.4s, v3.4s -; CHECK-NEXT: zip2 v7.4s, v7.4s, v6.4s -; CHECK-NEXT: ext v18.16b, v18.16b, v18.16b, #4 -; CHECK-NEXT: mov v1.s[2], v3.s[1] -; CHECK-NEXT: ext v5.16b, v2.16b, v5.16b, #12 -; CHECK-NEXT: ext v7.16b, v4.16b, v7.16b, #12 -; CHECK-NEXT: mov v2.s[2], v3.s[3] -; CHECK-NEXT: mov v4.s[2], v6.s[3] +; CHECK-NEXT: ext v16.16b, v17.16b, v3.16b, #4 +; CHECK-NEXT: zip2 v5.4s, v5.4s, v6.4s +; CHECK-NEXT: zip2 v7.4s, v7.4s, v1.4s +; CHECK-NEXT: mov v3.s[2], v1.s[1] ; CHECK-NEXT: uzp2 v16.4s, v17.4s, v16.4s -; CHECK-NEXT: sub v19.4s, v0.4s, v18.4s -; CHECK-NEXT: mov v18.s[0], v6.s[1] -; CHECK-NEXT: sub v17.4s, v2.4s, v5.4s -; CHECK-NEXT: sub v20.4s, v4.4s, v7.4s -; CHECK-NEXT: sub v21.4s, v1.4s, v16.4s -; CHECK-NEXT: mov v2.s[1], v3.s[2] +; CHECK-NEXT: ext v5.16b, v4.16b, v5.16b, #12 +; CHECK-NEXT: mov v4.s[2], v6.s[3] +; CHECK-NEXT: ext v7.16b, v2.16b, v7.16b, #12 +; CHECK-NEXT: mov v2.s[2], v1.s[3] +; CHECK-NEXT: ext v18.16b, v18.16b, v18.16b, #4 +; CHECK-NEXT: sub v17.4s, v3.4s, v16.4s +; CHECK-NEXT: sub v19.4s, v4.4s, v5.4s ; CHECK-NEXT: mov v4.s[1], v6.s[2] -; CHECK-NEXT: mov v1.s[1], v3.s[0] +; CHECK-NEXT: sub v20.4s, v2.4s, v7.4s +; CHECK-NEXT: mov v2.s[1], v1.s[2] +; CHECK-NEXT: sub v21.4s, v0.4s, v18.4s +; CHECK-NEXT: mov v18.s[0], v6.s[1] +; CHECK-NEXT: mov v3.s[1], v1.s[0] +; CHECK-NEXT: add v1.4s, v2.4s, v7.4s +; CHECK-NEXT: add v2.4s, v4.4s, v5.4s ; CHECK-NEXT: add v0.4s, v0.4s, v18.4s -; CHECK-NEXT: add v2.4s, v2.4s, v5.4s -; CHECK-NEXT: add v3.4s, v4.4s, v7.4s -; CHECK-NEXT: add v1.4s, v1.4s, v16.4s -; CHECK-NEXT: mov v0.d[1], v19.d[1] -; CHECK-NEXT: mov v1.d[1], v21.d[1] -; CHECK-NEXT: mov v2.d[1], v17.d[1] -; CHECK-NEXT: mov v3.d[1], v20.d[1] +; CHECK-NEXT: add v3.4s, v3.4s, v16.4s +; CHECK-NEXT: mov v0.d[1], v21.d[1] +; CHECK-NEXT: mov v3.d[1], v17.d[1] +; CHECK-NEXT: mov v1.d[1], v20.d[1] +; CHECK-NEXT: mov v2.d[1], v19.d[1] ; CHECK-NEXT: movi v4.8h, #1 ; CHECK-NEXT: movi v17.2d, #0x00ffff0000ffff -; CHECK-NEXT: ushr v5.4s, v2.4s, #15 +; CHECK-NEXT: ushr v5.4s, v1.4s, #15 ; CHECK-NEXT: ushr v6.4s, v0.4s, #15 -; CHECK-NEXT: ushr v7.4s, v3.4s, #15 -; CHECK-NEXT: ushr v16.4s, v1.4s, #15 +; CHECK-NEXT: ushr v7.4s, v2.4s, #15 +; CHECK-NEXT: ushr v16.4s, v3.4s, #15 ; CHECK-NEXT: and v6.16b, v6.16b, v4.16b ; CHECK-NEXT: and v16.16b, v16.16b, v4.16b ; CHECK-NEXT: and v7.16b, v7.16b, v4.16b @@ -163,15 +163,15 @@ ; CHECK-NEXT: mul v4.4s, v4.4s, v17.4s ; CHECK-NEXT: mul v7.4s, v7.4s, v17.4s ; CHECK-NEXT: add v0.4s, v5.4s, v0.4s -; CHECK-NEXT: add v1.4s, v6.4s, v1.4s -; CHECK-NEXT: add v2.4s, v4.4s, v2.4s -; CHECK-NEXT: add v3.4s, v7.4s, v3.4s -; CHECK-NEXT: eor v2.16b, v2.16b, v4.16b -; CHECK-NEXT: eor v3.16b, v3.16b, v7.16b -; CHECK-NEXT: eor v1.16b, v1.16b, v6.16b +; CHECK-NEXT: add v3.4s, v6.4s, v3.4s +; CHECK-NEXT: add v1.4s, v4.4s, v1.4s +; CHECK-NEXT: add v2.4s, v7.4s, v2.4s +; CHECK-NEXT: eor v1.16b, v1.16b, v4.16b +; CHECK-NEXT: eor v2.16b, v2.16b, v7.16b +; CHECK-NEXT: eor v3.16b, v3.16b, v6.16b ; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v1.4s, v3.4s, v2.4s +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: add v1.4s, v2.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 Index: llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll =================================================================== --- llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll +++ llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll @@ -95,15 +95,15 @@ ; CHECK-NEXT: cnth x8 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: sub x8, x8, #8 -; CHECK-NEXT: mov w9, #8 -; CHECK-NEXT: cmp x8, #8 ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: cmp x8, #8 ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: lsl x8, x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: lsl x8, x8, #1 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 @@ -139,15 +139,15 @@ ; CHECK-NEXT: cntw x8 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sub x8, x8, #4 -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: cmp x8, #4 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: cmp x8, #4 ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: lsl x8, x8, #2 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: lsl x8, x8, #2 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 @@ -183,15 +183,15 @@ ; CHECK-NEXT: cntd x8 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sub x8, x8, #2 -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: cmp x8, #2 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] ; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: cmp x8, #2 ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 @@ -208,18 +208,18 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p1.s, vl8 -; CHECK-NEXT: subs x8, x8, #8 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1] -; CHECK-NEXT: csel x8, xzr, x8, lo +; CHECK-NEXT: cntd x8 ; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: subs x8, x8, #8 +; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: cmp x8, #8 +; CHECK-NEXT: uunpklo z1.d, z1.s ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: uunpklo z1.d, z1.s ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x9, x8, lsl #3] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] Index: llvm/test/CodeGen/AArch64/insert-subvector.ll =================================================================== --- llvm/test/CodeGen/AArch64/insert-subvector.ll +++ llvm/test/CodeGen/AArch64/insert-subvector.ll @@ -49,8 +49,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 -; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -148,8 +148,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 -; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -272,8 +272,8 @@ define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, <4 x i8> *%a) { ; CHECK-LABEL: load_v16i8_4_15: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 +; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b @@ -338,8 +338,8 @@ ; CHECK-LABEL: load_v8i8_4_2: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, d1 -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: ldr s2, [x0] +; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %l = load <4 x i8>, <4 x i8> *%a @@ -364,9 +364,9 @@ define <16 x i8> @load_v16i8_8_2(float %tmp, <16 x i8> %b, <8 x i8> *%a) { ; CHECK-LABEL: load_v16i8_8_2: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %l = load <8 x i8>, <8 x i8> *%a %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> @@ -379,13 +379,13 @@ define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, <2 x i16> *%a) { ; CHECK-LABEL: load_v8i16_2_1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ld1 { v0.h }[2], [x9] +; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ld1 { v2.h }[2], [x8] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: mov v0.s[0], v1.s[0] +; CHECK-NEXT: mov v0.s[0], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, <2 x i16> *%a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> @@ -396,13 +396,13 @@ define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, <2 x i16> *%a) { ; CHECK-LABEL: load_v8i16_2_15: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: add x9, x0, #2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ld1 { v2.h }[2], [x8] +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: adrp x8, .LCPI33_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI33_0] +; CHECK-NEXT: ld1 { v2.h }[2], [x9] ; CHECK-NEXT: xtn v0.4h, v2.4s ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v3.16b ; CHECK-NEXT: ret @@ -415,13 +415,13 @@ define <8 x i16> @load_v8i16_2_2(float %tmp, <8 x i16> %b, <2 x i16> *%a) { ; CHECK-LABEL: load_v8i16_2_2: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ld1 { v0.h }[2], [x9] +; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ld1 { v2.h }[2], [x8] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, <2 x i16> *%a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> @@ -432,13 +432,13 @@ define <8 x i16> @load_v8i16_2_3(float %tmp, <8 x i16> %b, <2 x i16> *%a) { ; CHECK-LABEL: load_v8i16_2_3: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ld1 { v0.h }[2], [x9] +; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ld1 { v2.h }[2], [x8] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: mov v0.s[2], v1.s[0] +; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, <2 x i16> *%a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> @@ -449,13 +449,13 @@ define <8 x i16> @load_v8i16_2_4(float %tmp, <8 x i16> %b, <2 x i16> *%a) { ; CHECK-LABEL: load_v8i16_2_4: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ld1 { v0.h }[2], [x9] +; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ld1 { v2.h }[2], [x8] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: mov v0.s[3], v1.s[0] +; CHECK-NEXT: mov v0.s[3], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, <2 x i16> *%a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> @@ -513,9 +513,9 @@ define <8 x i16> @load_v8i16_4_2(float %tmp, <8 x i16> %b, <4 x i16> *%a) { ; CHECK-LABEL: load_v8i16_4_2: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %l = load <4 x i16>, <4 x i16> *%a %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> @@ -541,9 +541,9 @@ define <4 x i32> @load_v4i32_2_2(float %tmp, <4 x i32> %b, <2 x i32> *%a) { ; CHECK-LABEL: load_v4i32_2_2: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %l = load <2 x i32>, <2 x i32> *%a %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> @@ -640,12 +640,12 @@ define void @loads_before_stores(i8* %i44) { ; CHECK-LABEL: loads_before_stores: ; CHECK: // %bb.0: // %bb -; CHECK-NEXT: add x8, x0, #20 ; CHECK-NEXT: ldr s0, [x0, #28] -; CHECK-NEXT: ldrh w9, [x0, #26] +; CHECK-NEXT: add x9, x0, #20 +; CHECK-NEXT: ldrh w8, [x0, #26] ; CHECK-NEXT: ldrh w10, [x0, #24] -; CHECK-NEXT: ld1 { v0.s }[1], [x8] -; CHECK-NEXT: strh w9, [x0, #20] +; CHECK-NEXT: ld1 { v0.s }[1], [x9] +; CHECK-NEXT: strh w8, [x0, #20] ; CHECK-NEXT: strh w10, [x0, #30] ; CHECK-NEXT: stur d0, [x0, #22] ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/isinf.ll =================================================================== --- llvm/test/CodeGen/AArch64/isinf.ll +++ llvm/test/CodeGen/AArch64/isinf.ll @@ -26,8 +26,8 @@ define i32 @replace_isinf_call_f32(float %x) { ; CHECK-LABEL: replace_isinf_call_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2139095040 ; CHECK-NEXT: fabs s0, s0 +; CHECK-NEXT: mov w8, #2139095040 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: cset w0, eq @@ -42,8 +42,8 @@ define i32 @replace_isinf_call_f64(double %x) { ; CHECK-LABEL: replace_isinf_call_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9218868437227405312 ; CHECK-NEXT: fabs d0, d0 +; CHECK-NEXT: mov x8, #9218868437227405312 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: cset w0, eq Index: llvm/test/CodeGen/AArch64/known-never-nan.ll =================================================================== --- llvm/test/CodeGen/AArch64/known-never-nan.ll +++ llvm/test/CodeGen/AArch64/known-never-nan.ll @@ -28,13 +28,13 @@ define float @not_fmaxnm_maybe_nan(i32 %i1, i32 %i2) #0 { ; CHECK-LABEL: not_fmaxnm_maybe_nan: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-8388608 ; CHECK-NEXT: ucvtf s0, w0 ; CHECK-NEXT: ucvtf s1, w1 -; CHECK-NEXT: fmov s3, #17.00000000 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fadd s1, s1, s3 -; CHECK-NEXT: fmul s0, s0, s2 +; CHECK-NEXT: mov w8, #-8388608 +; CHECK-NEXT: fmov s2, #17.00000000 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fadd s1, s1, s2 +; CHECK-NEXT: fmul s0, s0, s3 ; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: fcsel s0, s0, s1, pl ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll =================================================================== --- llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -10,12 +10,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sunpkhi z2.h, z1.b ; CHECK-NEXT: sunpkhi z3.h, z0.b -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sunpklo z1.h, z1.b ; CHECK-NEXT: sunpkhi z4.s, z2.h ; CHECK-NEXT: sunpkhi z5.s, z3.h +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h +; CHECK-NEXT: sunpklo z1.h, z1.b ; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s @@ -36,9 +36,9 @@ define @sdiv_i16( %a, %b) { ; CHECK-LABEL: sdiv_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpkhi z2.s, z1.h ; CHECK-NEXT: sunpkhi z3.s, z0.h +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s @@ -112,22 +112,22 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sunpkhi z2.h, z1.b ; CHECK-NEXT: sunpkhi z3.h, z0.b +; CHECK-NEXT: sunpkhi z4.s, z2.h +; CHECK-NEXT: sunpkhi z5.s, z3.h ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sunpkhi z5.s, z2.h -; CHECK-NEXT: sunpkhi z6.s, z3.h ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: sunpklo z4.h, z1.b +; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: sunpklo z3.h, z0.b -; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: sunpkhi z6.s, z4.h -; CHECK-NEXT: sunpkhi z7.s, z3.h -; CHECK-NEXT: sunpklo z4.s, z4.h +; CHECK-NEXT: sunpklo z3.h, z1.b +; CHECK-NEXT: sunpklo z5.h, z0.b +; CHECK-NEXT: sunpkhi z6.s, z3.h +; CHECK-NEXT: sunpkhi z7.s, z5.h ; CHECK-NEXT: sunpklo z3.s, z3.h +; CHECK-NEXT: sunpklo z5.s, z5.h ; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z5.h +; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s +; CHECK-NEXT: uzp1 z2.h, z2.h, z4.h ; CHECK-NEXT: uzp1 z3.h, z3.h, z6.h ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b @@ -140,9 +140,9 @@ define @srem_i16( %a, %b) { ; CHECK-LABEL: srem_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpkhi z2.s, z1.h ; CHECK-NEXT: sunpkhi z3.s, z0.h +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpklo z4.s, z1.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: sunpklo z5.s, z0.h @@ -189,12 +189,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: uunpkhi z2.h, z1.b ; CHECK-NEXT: uunpkhi z3.h, z0.b -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpklo z1.h, z1.b ; CHECK-NEXT: uunpkhi z4.s, z2.h ; CHECK-NEXT: uunpkhi z5.s, z3.h +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: uunpklo z1.h, z1.b ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s @@ -215,9 +215,9 @@ define @udiv_i16( %a, %b) { ; CHECK-LABEL: udiv_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z2.s, z1.h ; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s @@ -262,9 +262,9 @@ define @udiv_widen_i32( %a, %b) { ; CHECK-LABEL: udiv_widen_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %div = udiv %a, %b @@ -292,22 +292,22 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: uunpkhi z2.h, z1.b ; CHECK-NEXT: uunpkhi z3.h, z0.b +; CHECK-NEXT: uunpkhi z4.s, z2.h +; CHECK-NEXT: uunpkhi z5.s, z3.h ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpkhi z5.s, z2.h -; CHECK-NEXT: uunpkhi z6.s, z3.h ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z4.h, z1.b +; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: uunpklo z3.h, z0.b -; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: uunpkhi z6.s, z4.h -; CHECK-NEXT: uunpkhi z7.s, z3.h -; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: uunpklo z3.h, z1.b +; CHECK-NEXT: uunpklo z5.h, z0.b +; CHECK-NEXT: uunpkhi z6.s, z3.h +; CHECK-NEXT: uunpkhi z7.s, z5.h ; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: uunpklo z5.s, z5.h ; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z5.h +; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s +; CHECK-NEXT: uzp1 z2.h, z2.h, z4.h ; CHECK-NEXT: uzp1 z3.h, z3.h, z6.h ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b @@ -320,9 +320,9 @@ define @urem_i16( %a, %b) { ; CHECK-LABEL: urem_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z2.s, z1.h ; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z4.s, z1.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: uunpklo z5.s, z0.h @@ -424,9 +424,9 @@ ; CHECK-LABEL: smin_split_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: smin z2.h, p0/m, z2.h, z6.h ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z4.h ; CHECK-NEXT: smin z1.h, p0/m, z1.h, z5.h +; CHECK-NEXT: smin z2.h, p0/m, z2.h, z6.h ; CHECK-NEXT: smin z3.h, p0/m, z3.h, z7.h ; CHECK-NEXT: ret %cmp = icmp slt %a, %b @@ -560,9 +560,9 @@ define @umin_promote_i8( %a, %b) { ; CHECK-LABEL: umin_promote_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %cmp = icmp ult %a, %b @@ -706,9 +706,9 @@ define @umax_promote_i32( %a, %b) { ; CHECK-LABEL: umax_promote_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %cmp = icmp ugt %a, %b @@ -885,8 +885,8 @@ define @lsl_promote_i16( %a, %b){ ; CHECK-LABEL: lsl_promote_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %shl = shl %a, %b @@ -984,9 +984,9 @@ define @lsr_promote_i8( %a, %b){ ; CHECK-LABEL: lsr_promote_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %shr = lshr %a, %b @@ -1067,9 +1067,9 @@ ; CHECK-LABEL: cmp_split_64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p3.b -; CHECK-NEXT: cmpgt p2.b, p3/z, z2.b, z6.b ; CHECK-NEXT: cmpgt p0.b, p3/z, z0.b, z4.b ; CHECK-NEXT: cmpgt p1.b, p3/z, z1.b, z5.b +; CHECK-NEXT: cmpgt p2.b, p3/z, z2.b, z6.b ; CHECK-NEXT: cmpgt p3.b, p3/z, z3.b, z7.b ; CHECK-NEXT: ret %cmp = icmp sgt %a, %b @@ -1083,13 +1083,13 @@ define @fshl_i64( %a, %b, %c){ ; CHECK-LABEL: fshl_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z3.d, #63 // =0x3f -; CHECK-NEXT: mov z4.d, z2.d -; CHECK-NEXT: bic z2.d, z3.d, z2.d -; CHECK-NEXT: and z4.d, z4.d, #0x3f +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z4.d, #63 // =0x3f +; CHECK-NEXT: bic z2.d, z4.d, z2.d +; CHECK-NEXT: and z3.d, z3.d, #0x3f ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: lsr z1.d, z1.d, #1 -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z4.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z3.d ; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: ret @@ -1103,15 +1103,15 @@ ; CHECK-NEXT: mov z6.d, #63 // =0x3f ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: bic z7.d, z6.d, z4.d -; CHECK-NEXT: lsr z2.d, z2.d, #1 -; CHECK-NEXT: bic z6.d, z6.d, z5.d ; CHECK-NEXT: and z4.d, z4.d, #0x3f +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z4.d +; CHECK-NEXT: lsr z2.d, z2.d, #1 +; CHECK-NEXT: bic z4.d, z6.d, z5.d ; CHECK-NEXT: and z5.d, z5.d, #0x3f ; CHECK-NEXT: lsr z3.d, z3.d, #1 ; CHECK-NEXT: lsr z2.d, p0/m, z2.d, z7.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z4.d ; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z5.d -; CHECK-NEXT: lsr z3.d, p0/m, z3.d, z6.d +; CHECK-NEXT: lsr z3.d, p0/m, z3.d, z4.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: ret @@ -1124,12 +1124,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: subr z1.d, z1.d, #0 // =0x0 +; CHECK-NEXT: and z2.d, z2.d, #0x3f ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0x3f -; CHECK-NEXT: and z2.d, z2.d, #0x3f -; CHECK-NEXT: lsrr z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z0.d +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: orr z0.d, z2.d, z0.d ; CHECK-NEXT: ret %fshl = call @llvm.fshl.nxv2i64( %a, %a, %b) ret %fshl @@ -1141,20 +1141,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z4.d, z2.d ; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0 -; CHECK-NEXT: mov z5.d, z3.d -; CHECK-NEXT: subr z3.d, z3.d, #0 // =0x0 +; CHECK-NEXT: and z4.d, z4.d, #0x3f ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z2.d, z2.d, #0x3f -; CHECK-NEXT: and z4.d, z4.d, #0x3f +; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z0.d +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: subr z3.d, z3.d, #0 // =0x0 +; CHECK-NEXT: and z2.d, z2.d, #0x3f ; CHECK-NEXT: and z3.d, z3.d, #0x3f -; CHECK-NEXT: lsrr z2.d, p0/m, z2.d, z0.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z4.d -; CHECK-NEXT: and z5.d, z5.d, #0x3f -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: lsl z4.d, p0/m, z4.d, z5.d +; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: orr z1.d, z4.d, z1.d +; CHECK-NEXT: orr z0.d, z4.d, z0.d +; CHECK-NEXT: orr z1.d, z2.d, z1.d ; CHECK-NEXT: ret %fshl = call @llvm.fshl.nxv4i64( %a, %a, %b) ret %fshl @@ -1176,13 +1175,13 @@ define @fshr_i64( %a, %b, %c){ ; CHECK-LABEL: fshr_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z3.d, #63 // =0x3f -; CHECK-NEXT: mov z4.d, z2.d -; CHECK-NEXT: bic z2.d, z3.d, z2.d -; CHECK-NEXT: and z4.d, z4.d, #0x3f +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z4.d, #63 // =0x3f +; CHECK-NEXT: bic z2.d, z4.d, z2.d +; CHECK-NEXT: and z3.d, z3.d, #0x3f ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: lsl z0.d, z0.d, #1 -; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z4.d +; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: ret @@ -1195,12 +1194,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: subr z1.d, z1.d, #0 // =0x0 +; CHECK-NEXT: and z2.d, z2.d, #0x3f ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0x3f -; CHECK-NEXT: and z2.d, z2.d, #0x3f -; CHECK-NEXT: lslr z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: lsrr z2.d, p0/m, z2.d, z0.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: orr z0.d, z2.d, z0.d ; CHECK-NEXT: ret %fshr = call @llvm.fshr.nxv2i64( %a, %a, %b) ret %fshr Index: llvm/test/CodeGen/AArch64/logic-reassociate.ll =================================================================== --- llvm/test/CodeGen/AArch64/logic-reassociate.ll +++ llvm/test/CodeGen/AArch64/logic-reassociate.ll @@ -75,9 +75,9 @@ define <8 x i64> @or_commute3(<8 x i64> %x, <8 x i64> %y) { ; CHECK-LABEL: or_commute3: ; CHECK: // %bb.0: -; CHECK-NEXT: orr v2.16b, v6.16b, v2.16b ; CHECK-NEXT: orr v0.16b, v4.16b, v0.16b ; CHECK-NEXT: orr v1.16b, v5.16b, v1.16b +; CHECK-NEXT: orr v2.16b, v6.16b, v2.16b ; CHECK-NEXT: orr v3.16b, v7.16b, v3.16b ; CHECK-NEXT: ret %b = or <8 x i64> %y, %x Index: llvm/test/CodeGen/AArch64/logic-shift.ll =================================================================== --- llvm/test/CodeGen/AArch64/logic-shift.ll +++ llvm/test/CodeGen/AArch64/logic-shift.ll @@ -200,10 +200,10 @@ define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) { ; CHECK-LABEL: or_lshr_mix_shift_amount: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x9, x0, x2 -; CHECK-NEXT: lsr x8, x1, x4 -; CHECK-NEXT: orr x9, x9, x3 -; CHECK-NEXT: orr x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x2 +; CHECK-NEXT: lsr x9, x1, x4 +; CHECK-NEXT: orr x8, x8, x3 +; CHECK-NEXT: orr x0, x8, x9 ; CHECK-NEXT: ret %sh1 = lshr i64 %x0, %y %sh2 = lshr i64 %x1, %w @@ -428,10 +428,10 @@ define i64 @xor_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) { ; CHECK-LABEL: xor_lshr_mix_shift_amount: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x9, x0, x2 -; CHECK-NEXT: lsr x8, x1, x4 -; CHECK-NEXT: eor x9, x9, x3 -; CHECK-NEXT: eor x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x2 +; CHECK-NEXT: lsr x9, x1, x4 +; CHECK-NEXT: eor x8, x8, x3 +; CHECK-NEXT: eor x0, x8, x9 ; CHECK-NEXT: ret %sh1 = lshr i64 %x0, %y %sh2 = lshr i64 %x1, %w @@ -656,10 +656,10 @@ define i64 @and_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) { ; CHECK-LABEL: and_lshr_mix_shift_amount: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x9, x0, x2 -; CHECK-NEXT: lsr x8, x1, x4 -; CHECK-NEXT: and x9, x9, x3 -; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x2 +; CHECK-NEXT: lsr x9, x1, x4 +; CHECK-NEXT: and x8, x8, x3 +; CHECK-NEXT: and x0, x8, x9 ; CHECK-NEXT: ret %sh1 = lshr i64 %x0, %y %sh2 = lshr i64 %x1, %w @@ -788,9 +788,10 @@ define i16 @or_fshr_commute2(i16 %x, i16 %y) { ; CHECK-LABEL: or_fshr_commute2: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w0, w1 -; CHECK-NEXT: lsl w0, w0, #9 -; CHECK-NEXT: bfxil w0, w8, #7, #9 +; CHECK-NEXT: lsl w8, w0, #9 +; CHECK-NEXT: orr w9, w0, w1 +; CHECK-NEXT: bfxil w8, w9, #7, #9 +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %or1 = or i16 %x, %y %sh1 = shl i16 %x, 9 @@ -802,9 +803,10 @@ define i8 @or_fshr_commute3(i8 %x, i8 %y) { ; CHECK-LABEL: or_fshr_commute3: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w1, w0 -; CHECK-NEXT: lsl w0, w0, #2 -; CHECK-NEXT: bfxil w0, w8, #6, #2 +; CHECK-NEXT: lsl w8, w0, #2 +; CHECK-NEXT: orr w9, w1, w0 +; CHECK-NEXT: bfxil w8, w9, #6, #2 +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %or1 = or i8 %y, %x %sh1 = shl i8 %x, 2 Index: llvm/test/CodeGen/AArch64/logical_shifted_reg.ll =================================================================== --- llvm/test/CodeGen/AArch64/logical_shifted_reg.ll +++ llvm/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -18,37 +18,37 @@ ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: and w11, w10, w9 ; CHECK-NEXT: bic w12, w10, w9 -; CHECK-NEXT: orr w13, w10, w9 ; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: orn w11, w10, w9 +; CHECK-NEXT: orr w11, w10, w9 ; CHECK-NEXT: str w12, [x8] -; CHECK-NEXT: eor w12, w10, w9 -; CHECK-NEXT: str w13, [x8] -; CHECK-NEXT: eon w13, w9, w10 +; CHECK-NEXT: orn w12, w10, w9 ; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: and w11, w10, w9, lsl #31 +; CHECK-NEXT: eor w11, w10, w9 ; CHECK-NEXT: str w12, [x8] -; CHECK-NEXT: bic w12, w10, w9, lsl #31 -; CHECK-NEXT: str w13, [x8] -; CHECK-NEXT: orr w13, w10, w9, lsl #31 +; CHECK-NEXT: and w12, w10, w9, lsl #31 +; CHECK-NEXT: str w11, [x8] +; CHECK-NEXT: eon w11, w9, w10 +; CHECK-NEXT: str w11, [x8] +; CHECK-NEXT: bic w11, w10, w9, lsl #31 +; CHECK-NEXT: str w12, [x8] +; CHECK-NEXT: orr w12, w10, w9, lsl #31 ; CHECK-NEXT: str w11, [x8] ; CHECK-NEXT: orn w11, w10, w9, lsl #31 ; CHECK-NEXT: str w12, [x8] ; CHECK-NEXT: eor w12, w10, w9, lsl #31 -; CHECK-NEXT: str w13, [x8] -; CHECK-NEXT: eon w13, w10, w9, lsl #31 ; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: bic w11, w10, w9, asr #10 +; CHECK-NEXT: eon w11, w10, w9, lsl #31 +; CHECK-NEXT: str w12, [x8] +; CHECK-NEXT: bic w12, w10, w9, asr #10 +; CHECK-NEXT: str w11, [x8] +; CHECK-NEXT: eor w11, w10, w9, asr #10 ; CHECK-NEXT: str w12, [x8] -; CHECK-NEXT: eor w12, w10, w9, asr #10 -; CHECK-NEXT: str w13, [x8] -; CHECK-NEXT: orn w13, w10, w9, lsr #1 +; CHECK-NEXT: orn w12, w10, w9, lsr #1 ; CHECK-NEXT: str w11, [x8] ; CHECK-NEXT: eor w11, w10, w9, lsr #1 ; CHECK-NEXT: str w12, [x8] ; CHECK-NEXT: eon w12, w10, w9, ror #20 ; CHECK-NEXT: and w9, w10, w9, ror #20 -; CHECK-NEXT: str w13, [x8] ; CHECK-NEXT: str w11, [x8] ; CHECK-NEXT: str w12, [x8] ; CHECK-NEXT: str w9, [x8] @@ -134,37 +134,37 @@ ; CHECK-NEXT: ldr x9, [x9] ; CHECK-NEXT: and x11, x10, x9 ; CHECK-NEXT: bic x12, x10, x9 -; CHECK-NEXT: orr x13, x10, x9 ; CHECK-NEXT: str x11, [x8] -; CHECK-NEXT: orn x11, x10, x9 +; CHECK-NEXT: orr x11, x10, x9 ; CHECK-NEXT: str x12, [x8] -; CHECK-NEXT: eor x12, x10, x9 -; CHECK-NEXT: str x13, [x8] -; CHECK-NEXT: eon x13, x9, x10 +; CHECK-NEXT: orn x12, x10, x9 ; CHECK-NEXT: str x11, [x8] -; CHECK-NEXT: and x11, x10, x9, lsl #63 +; CHECK-NEXT: eor x11, x10, x9 ; CHECK-NEXT: str x12, [x8] -; CHECK-NEXT: bic x12, x10, x9, lsl #63 -; CHECK-NEXT: str x13, [x8] -; CHECK-NEXT: orr x13, x10, x9, lsl #63 +; CHECK-NEXT: and x12, x10, x9, lsl #63 +; CHECK-NEXT: str x11, [x8] +; CHECK-NEXT: eon x11, x9, x10 +; CHECK-NEXT: str x11, [x8] +; CHECK-NEXT: bic x11, x10, x9, lsl #63 +; CHECK-NEXT: str x12, [x8] +; CHECK-NEXT: orr x12, x10, x9, lsl #63 ; CHECK-NEXT: str x11, [x8] ; CHECK-NEXT: orn x11, x10, x9, lsl #63 ; CHECK-NEXT: str x12, [x8] ; CHECK-NEXT: eor x12, x10, x9, lsl #63 -; CHECK-NEXT: str x13, [x8] -; CHECK-NEXT: eon x13, x10, x9, lsl #63 ; CHECK-NEXT: str x11, [x8] -; CHECK-NEXT: bic x11, x10, x9, asr #10 +; CHECK-NEXT: eon x11, x10, x9, lsl #63 +; CHECK-NEXT: str x12, [x8] +; CHECK-NEXT: bic x12, x10, x9, asr #10 +; CHECK-NEXT: str x11, [x8] +; CHECK-NEXT: eor x11, x10, x9, asr #10 ; CHECK-NEXT: str x12, [x8] -; CHECK-NEXT: eor x12, x10, x9, asr #10 -; CHECK-NEXT: str x13, [x8] -; CHECK-NEXT: orn x13, x10, x9, lsr #1 +; CHECK-NEXT: orn x12, x10, x9, lsr #1 ; CHECK-NEXT: str x11, [x8] ; CHECK-NEXT: eor x11, x10, x9, lsr #1 ; CHECK-NEXT: str x12, [x8] ; CHECK-NEXT: eon x12, x10, x9, ror #20 ; CHECK-NEXT: and x9, x10, x9, ror #20 -; CHECK-NEXT: str x13, [x8] ; CHECK-NEXT: str x11, [x8] ; CHECK-NEXT: str x12, [x8] ; CHECK-NEXT: str x9, [x8] @@ -254,9 +254,9 @@ ; CHECK-NEXT: tst x9, x10 ; CHECK-NEXT: b.gt .LBB2_2 ; CHECK-NEXT: // %bb.1: // %test2 +; CHECK-NEXT: and x11, x9, x10, asr #12 ; CHECK-NEXT: tst x9, x10, lsl #63 -; CHECK-NEXT: and x10, x9, x10, asr #12 -; CHECK-NEXT: ccmp x10, #1, #0, ge +; CHECK-NEXT: ccmp x11, #1, #0, ge ; CHECK-NEXT: b.lt .LBB2_3 ; CHECK-NEXT: .LBB2_2: // %common.ret ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/lowerMUL-newload.ll =================================================================== --- llvm/test/CodeGen/AArch64/lowerMUL-newload.ll +++ llvm/test/CodeGen/AArch64/lowerMUL-newload.ll @@ -39,9 +39,9 @@ define void @mlai16_loadstore(i16* %a, i16* %b, i16* %c) { ; CHECK-LABEL: mlai16_loadstore: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr d0, [x0, #16] -; CHECK-NEXT: ldr d1, [x1, #16] -; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h +; CHECK-NEXT: ldr d0, [x1, #16] +; CHECK-NEXT: ldr d1, [x0, #16] +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: ldr d1, [x2, #16] ; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h ; CHECK-NEXT: xtn v0.4h, v0.4s @@ -90,9 +90,9 @@ ; CHECK-LABEL: addmuli16_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff ; CHECK-NEXT: smlal v1.4s, v0.4h, v2.4h -; CHECK-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret entry: %v0 = sext <4 x i16> %vec0 to <4 x i32> @@ -175,9 +175,9 @@ define void @mlai32_loadstore(i32* %a, i32* %b, i32* %c) { ; CHECK-LABEL: mlai32_loadstore: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr d0, [x0, #32] -; CHECK-NEXT: ldr d1, [x1, #32] -; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s +; CHECK-NEXT: ldr d0, [x1, #32] +; CHECK-NEXT: ldr d1, [x0, #32] +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-NEXT: ldr d1, [x2, #32] ; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s ; CHECK-NEXT: xtn v0.2s, v0.2d @@ -226,9 +226,9 @@ ; CHECK-LABEL: addmuli32_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v1.2d, v1.2s, v2.2s -; CHECK-NEXT: movi v3.2d, #0x000000ffffffff ; CHECK-NEXT: smlal v1.2d, v0.2s, v2.2s -; CHECK-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v0.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret entry: %v0 = sext <2 x i32> %vec0 to <2 x i64> Index: llvm/test/CodeGen/AArch64/machine-combiner-copy.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-combiner-copy.ll +++ llvm/test/CodeGen/AArch64/machine-combiner-copy.ll @@ -7,8 +7,8 @@ ; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-NEXT: cbz w2, .LBB0_8 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: mov w8, w2 ; CHECK-NEXT: cmp w2, #15 +; CHECK-NEXT: mov w8, w2 ; CHECK-NEXT: b.hi .LBB0_3 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x9, xzr Index: llvm/test/CodeGen/AArch64/machine-combiner-subadd.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-combiner-subadd.ll +++ llvm/test/CodeGen/AArch64/machine-combiner-subadd.ll @@ -11,10 +11,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: add w9, w0, #100 ; CHECK-NEXT: orr w8, w2, #0x80 +; CHECK-NEXT: eor w10, w1, w9, lsl #8 ; CHECK-NEXT: sub w8, w8, w9 -; CHECK-NEXT: eor w9, w1, w9, lsl #8 -; CHECK-NEXT: sub w8, w8, w9 -; CHECK-NEXT: eor w0, w8, w9, asr #13 +; CHECK-NEXT: sub w8, w8, w10 +; CHECK-NEXT: eor w0, w8, w10, asr #13 ; CHECK-NEXT: ret entry: %c1 = or i32 %c, 128 @@ -34,10 +34,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: add x9, x0, #100 ; CHECK-NEXT: orr x8, x2, #0x80 +; CHECK-NEXT: eor x10, x1, x9, lsl #8 ; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: eor x9, x1, x9, lsl #8 -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: eor x0, x8, x9, asr #13 +; CHECK-NEXT: sub x8, x8, x10 +; CHECK-NEXT: eor x0, x8, x10, asr #13 ; CHECK-NEXT: ret entry: %c1 = or i64 %c, 128 @@ -57,10 +57,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: add w9, w0, #100 ; CHECK-NEXT: orr w8, w2, #0x80 +; CHECK-NEXT: eor w10, w1, w9, lsl #8 ; CHECK-NEXT: add w8, w8, w9 -; CHECK-NEXT: eor w9, w1, w9, lsl #8 -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: eor w0, w8, w9, asr #13 +; CHECK-NEXT: sub w8, w10, w8 +; CHECK-NEXT: eor w0, w8, w10, asr #13 ; CHECK-NEXT: ret entry: %c1 = or i32 %c, 128 Index: llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll +++ llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll @@ -131,23 +131,23 @@ ; CHECK-NEXT: b.lt .LBB2_3 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: adrp x8, A -; CHECK-NEXT: mov w9, #42 -; CHECK-NEXT: mov w20, w19 -; CHECK-NEXT: ldr w21, [x8, :lo12:A] -; CHECK-NEXT: str w9, [x0] +; CHECK-NEXT: mov w21, w19 +; CHECK-NEXT: ldr w20, [x8, :lo12:A] +; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: .LBB2_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: mov w0, w21 +; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl _Z3usei -; CHECK-NEXT: sdiv w20, w20, w0 +; CHECK-NEXT: sdiv w21, w21, w0 ; CHECK-NEXT: subs w19, w19, #1 ; CHECK-NEXT: b.ne .LBB2_2 ; CHECK-NEXT: b .LBB2_4 ; CHECK-NEXT: .LBB2_3: -; CHECK-NEXT: mov w20, w19 +; CHECK-NEXT: mov w21, w19 ; CHECK-NEXT: .LBB2_4: // %for.cond.cleanup -; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll =================================================================== --- llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll +++ llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll @@ -15,9 +15,9 @@ ; CHECK-LABEL: test_memcpy: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* @@ -38,9 +38,9 @@ ; CHECK-LABEL: test_memcpy_inline: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* @@ -61,9 +61,9 @@ ; CHECK-LABEL: test_memmove: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* @@ -84,11 +84,11 @@ define i32 @test_memset(i32* nocapture %p, i32* nocapture readonly %q) { ; CHECK-LABEL: test_memset: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp w10, w11, [x1] +; CHECK-NEXT: ldp w9, w10, [x1] ; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: mov x9, #-6148914691236517206 -; CHECK-NEXT: add w0, w10, w11 -; CHECK-NEXT: stp x9, x9, [x8] +; CHECK-NEXT: mov x11, #-6148914691236517206 +; CHECK-NEXT: stp x11, x11, [x8] +; CHECK-NEXT: add w0, w9, w10 ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8 170, i64 16, i1 false), !alias.scope !2, !noalias !4 @@ -106,9 +106,9 @@ ; CHECK-LABEL: test_mempcpy: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* Index: llvm/test/CodeGen/AArch64/merge-trunc-store.ll =================================================================== --- llvm/test/CodeGen/AArch64/merge-trunc-store.ll +++ llvm/test/CodeGen/AArch64/merge-trunc-store.ll @@ -524,11 +524,11 @@ define void @be_i64_to_i16(i64 %x, i16* %p0) { ; LE-LABEL: be_i64_to_i16: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #32 -; LE-NEXT: ror w9, w0, #16 +; LE-NEXT: ror w8, w0, #16 +; LE-NEXT: lsr x9, x0, #32 ; LE-NEXT: lsr x10, x0, #48 -; LE-NEXT: strh w8, [x1, #2] -; LE-NEXT: str w9, [x1, #4] +; LE-NEXT: str w8, [x1, #4] +; LE-NEXT: strh w9, [x1, #2] ; LE-NEXT: strh w10, [x1] ; LE-NEXT: ret ; @@ -749,16 +749,16 @@ ; CHECK-NEXT: lsr x8, x0, #56 ; CHECK-NEXT: lsr x9, x0, #48 ; CHECK-NEXT: lsr x10, x0, #40 -; CHECK-NEXT: lsr x11, x0, #32 ; CHECK-NEXT: strb w0, [x1, #7] ; CHECK-NEXT: strb w8, [x1] -; CHECK-NEXT: lsr x8, x0, #16 +; CHECK-NEXT: lsr x8, x0, #32 ; CHECK-NEXT: strb w9, [x1, #1] -; CHECK-NEXT: lsr x9, x0, #8 +; CHECK-NEXT: lsr x9, x0, #16 ; CHECK-NEXT: strb w10, [x1, #2] -; CHECK-NEXT: strb w11, [x1, #3] -; CHECK-NEXT: strb w8, [x1, #5] -; CHECK-NEXT: strb w9, [x1, #6] +; CHECK-NEXT: lsr x10, x0, #8 +; CHECK-NEXT: strb w8, [x1, #3] +; CHECK-NEXT: strb w9, [x1, #5] +; CHECK-NEXT: strb w10, [x1, #6] ; CHECK-NEXT: ret %sh1 = lshr i64 %x, 8 %sh2 = lshr i64 %x, 16 Index: llvm/test/CodeGen/AArch64/midpoint-int.ll =================================================================== --- llvm/test/CodeGen/AArch64/midpoint-int.ll +++ llvm/test/CodeGen/AArch64/midpoint-int.ll @@ -14,12 +14,12 @@ ; CHECK-LABEL: scalar_i32_signed_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: mov w10, #-1 ; CHECK-NEXT: csel w8, w1, w0, gt ; CHECK-NEXT: csel w9, w0, w1, gt ; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, le +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: lsr w8, w8, #1 +; CHECK-NEXT: cneg w9, w9, le ; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i32 %a1, %a2 ; signed @@ -37,12 +37,12 @@ ; CHECK-LABEL: scalar_i32_unsigned_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: mov w10, #-1 ; CHECK-NEXT: csel w8, w1, w0, hi ; CHECK-NEXT: csel w9, w0, w1, hi ; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, ls +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: lsr w8, w8, #1 +; CHECK-NEXT: cneg w9, w9, ls ; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i32 %a1, %a2 @@ -61,15 +61,15 @@ define i32 @scalar_i32_signed_mem_reg(i32* %a1_addr, i32 %a2) nounwind { ; CHECK-LABEL: scalar_i32_signed_mem_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: cmp w9, w1 -; CHECK-NEXT: csel w10, w1, w9, gt -; CHECK-NEXT: csel w11, w9, w1, gt -; CHECK-NEXT: sub w10, w11, w10 -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: lsr w10, w10, #1 -; CHECK-NEXT: madd w0, w10, w8, w9 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: cmp w8, w1 +; CHECK-NEXT: csel w9, w1, w8, gt +; CHECK-NEXT: csel w10, w8, w1, gt +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: madd w0, w9, w10, w8 ; CHECK-NEXT: ret %a1 = load i32, i32* %a1_addr %t3 = icmp sgt i32 %a1, %a2 ; signed @@ -86,15 +86,15 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, i32* %a2_addr) nounwind { ; CHECK-LABEL: scalar_i32_signed_reg_mem: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: csel w10, w9, w0, gt -; CHECK-NEXT: csel w9, w0, w9, gt -; CHECK-NEXT: sub w9, w9, w10 -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: madd w0, w9, w8, w0 +; CHECK-NEXT: ldr w8, [x1] +; CHECK-NEXT: cmp w0, w8 +; CHECK-NEXT: csel w9, w8, w0, gt +; CHECK-NEXT: csel w8, w0, w8, gt +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsr w8, w8, #1 +; CHECK-NEXT: cneg w9, w9, le +; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %a2 = load i32, i32* %a2_addr %t3 = icmp sgt i32 %a1, %a2 ; signed @@ -111,16 +111,16 @@ define i32 @scalar_i32_signed_mem_mem(i32* %a1_addr, i32* %a2_addr) nounwind { ; CHECK-LABEL: scalar_i32_signed_mem_mem: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: ldr w10, [x1] -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w11, w10, w9, gt -; CHECK-NEXT: csel w10, w9, w10, gt -; CHECK-NEXT: sub w10, w10, w11 -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: lsr w10, w10, #1 -; CHECK-NEXT: madd w0, w10, w8, w9 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w10, w9, w8, gt +; CHECK-NEXT: csel w9, w8, w9, gt +; CHECK-NEXT: sub w9, w9, w10 +; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: madd w0, w9, w10, w8 ; CHECK-NEXT: ret %a1 = load i32, i32* %a1_addr %a2 = load i32, i32* %a2_addr @@ -145,12 +145,12 @@ ; CHECK-LABEL: scalar_i64_signed_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: mov x10, #-1 ; CHECK-NEXT: csel x8, x1, x0, gt ; CHECK-NEXT: csel x9, x0, x1, gt ; CHECK-NEXT: sub x8, x9, x8 -; CHECK-NEXT: cneg x9, x10, le +; CHECK-NEXT: mov x9, #-1 ; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: cneg x9, x9, le ; CHECK-NEXT: madd x0, x8, x9, x0 ; CHECK-NEXT: ret %t3 = icmp sgt i64 %a1, %a2 ; signed @@ -168,12 +168,12 @@ ; CHECK-LABEL: scalar_i64_unsigned_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: mov x10, #-1 ; CHECK-NEXT: csel x8, x1, x0, hi ; CHECK-NEXT: csel x9, x0, x1, hi ; CHECK-NEXT: sub x8, x9, x8 -; CHECK-NEXT: cneg x9, x10, ls +; CHECK-NEXT: mov x9, #-1 ; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: cneg x9, x9, ls ; CHECK-NEXT: madd x0, x8, x9, x0 ; CHECK-NEXT: ret %t3 = icmp ugt i64 %a1, %a2 @@ -192,15 +192,15 @@ define i64 @scalar_i64_signed_mem_reg(i64* %a1_addr, i64 %a2) nounwind { ; CHECK-LABEL: scalar_i64_signed_mem_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: csel x10, x1, x9, gt -; CHECK-NEXT: csel x11, x9, x1, gt -; CHECK-NEXT: sub x10, x11, x10 -; CHECK-NEXT: cneg x8, x8, le -; CHECK-NEXT: lsr x10, x10, #1 -; CHECK-NEXT: madd x0, x10, x8, x9 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: cmp x8, x1 +; CHECK-NEXT: csel x9, x1, x8, gt +; CHECK-NEXT: csel x10, x8, x1, gt +; CHECK-NEXT: sub x9, x10, x9 +; CHECK-NEXT: mov x10, #-1 +; CHECK-NEXT: lsr x9, x9, #1 +; CHECK-NEXT: cneg x10, x10, le +; CHECK-NEXT: madd x0, x9, x10, x8 ; CHECK-NEXT: ret %a1 = load i64, i64* %a1_addr %t3 = icmp sgt i64 %a1, %a2 ; signed @@ -217,15 +217,15 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, i64* %a2_addr) nounwind { ; CHECK-LABEL: scalar_i64_signed_reg_mem: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: cmp x0, x9 -; CHECK-NEXT: csel x10, x9, x0, gt -; CHECK-NEXT: csel x9, x0, x9, gt -; CHECK-NEXT: sub x9, x9, x10 -; CHECK-NEXT: cneg x8, x8, le -; CHECK-NEXT: lsr x9, x9, #1 -; CHECK-NEXT: madd x0, x9, x8, x0 +; CHECK-NEXT: ldr x8, [x1] +; CHECK-NEXT: cmp x0, x8 +; CHECK-NEXT: csel x9, x8, x0, gt +; CHECK-NEXT: csel x8, x0, x8, gt +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsr x8, x8, #1 +; CHECK-NEXT: cneg x9, x9, le +; CHECK-NEXT: madd x0, x8, x9, x0 ; CHECK-NEXT: ret %a2 = load i64, i64* %a2_addr %t3 = icmp sgt i64 %a1, %a2 ; signed @@ -242,16 +242,16 @@ define i64 @scalar_i64_signed_mem_mem(i64* %a1_addr, i64* %a2_addr) nounwind { ; CHECK-LABEL: scalar_i64_signed_mem_mem: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: ldr x10, [x1] -; CHECK-NEXT: cmp x9, x10 -; CHECK-NEXT: csel x11, x10, x9, gt -; CHECK-NEXT: csel x10, x9, x10, gt -; CHECK-NEXT: sub x10, x10, x11 -; CHECK-NEXT: cneg x8, x8, le -; CHECK-NEXT: lsr x10, x10, #1 -; CHECK-NEXT: madd x0, x10, x8, x9 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: csel x10, x9, x8, gt +; CHECK-NEXT: csel x9, x8, x9, gt +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov x10, #-1 +; CHECK-NEXT: lsr x9, x9, #1 +; CHECK-NEXT: cneg x10, x10, le +; CHECK-NEXT: madd x0, x9, x10, x8 ; CHECK-NEXT: ret %a1 = load i64, i64* %a1_addr %a2 = load i64, i64* %a2_addr @@ -276,13 +276,13 @@ ; CHECK-LABEL: scalar_i16_signed_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w10, #-1 ; CHECK-NEXT: cmp w8, w1, sxth ; CHECK-NEXT: csel w8, w1, w0, gt ; CHECK-NEXT: csel w9, w0, w1, gt ; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, le +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: ubfx w8, w8, #1, #15 +; CHECK-NEXT: cneg w9, w9, le ; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -300,13 +300,13 @@ ; CHECK-LABEL: scalar_i16_unsigned_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mov w10, #-1 ; CHECK-NEXT: cmp w8, w1, uxth ; CHECK-NEXT: csel w8, w1, w0, hi ; CHECK-NEXT: csel w9, w0, w1, hi ; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, ls +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: ubfx w8, w8, #1, #15 +; CHECK-NEXT: cneg w9, w9, ls ; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i16 %a1, %a2 @@ -325,15 +325,15 @@ define i16 @scalar_i16_signed_mem_reg(i16* %a1_addr, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_signed_mem_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsh w9, [x0] -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: cmp w9, w1, sxth -; CHECK-NEXT: csel w10, w1, w9, gt -; CHECK-NEXT: csel w11, w9, w1, gt -; CHECK-NEXT: sub w10, w11, w10 -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: ubfx w10, w10, #1, #15 -; CHECK-NEXT: madd w0, w10, w8, w9 +; CHECK-NEXT: ldrsh w8, [x0] +; CHECK-NEXT: cmp w8, w1, sxth +; CHECK-NEXT: csel w9, w1, w8, gt +; CHECK-NEXT: csel w10, w8, w1, gt +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: ubfx w9, w9, #1, #15 +; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: madd w0, w9, w10, w8 ; CHECK-NEXT: ret %a1 = load i16, i16* %a1_addr %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -350,15 +350,15 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, i16* %a2_addr) nounwind { ; CHECK-LABEL: scalar_i16_signed_reg_mem: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsh w9, [x1] -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: csel w8, w9, w0, gt -; CHECK-NEXT: csel w9, w0, w9, gt -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, le +; CHECK-NEXT: ldrsh w8, [x1] +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w8, w0, gt +; CHECK-NEXT: csel w8, w0, w8, gt +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: ubfx w8, w8, #1, #15 +; CHECK-NEXT: cneg w9, w9, le ; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %a2 = load i16, i16* %a2_addr @@ -376,16 +376,16 @@ define i16 @scalar_i16_signed_mem_mem(i16* %a1_addr, i16* %a2_addr) nounwind { ; CHECK-LABEL: scalar_i16_signed_mem_mem: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsh w9, [x0] -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: ldrsh w10, [x1] -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w11, w10, w9, gt -; CHECK-NEXT: csel w10, w9, w10, gt -; CHECK-NEXT: sub w10, w10, w11 -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: ubfx w10, w10, #1, #15 -; CHECK-NEXT: madd w0, w10, w8, w9 +; CHECK-NEXT: ldrsh w8, [x0] +; CHECK-NEXT: ldrsh w9, [x1] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w10, w9, w8, gt +; CHECK-NEXT: csel w9, w8, w9, gt +; CHECK-NEXT: sub w9, w9, w10 +; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: ubfx w9, w9, #1, #15 +; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: madd w0, w9, w10, w8 ; CHECK-NEXT: ret %a1 = load i16, i16* %a1_addr %a2 = load i16, i16* %a2_addr @@ -410,13 +410,13 @@ ; CHECK-LABEL: scalar_i8_signed_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w10, #-1 ; CHECK-NEXT: cmp w8, w1, sxtb ; CHECK-NEXT: csel w8, w1, w0, gt ; CHECK-NEXT: csel w9, w0, w1, gt ; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, le +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: ubfx w8, w8, #1, #7 +; CHECK-NEXT: cneg w9, w9, le ; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i8 %a1, %a2 ; signed @@ -434,13 +434,13 @@ ; CHECK-LABEL: scalar_i8_unsigned_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: mov w10, #-1 ; CHECK-NEXT: cmp w8, w1, uxtb ; CHECK-NEXT: csel w8, w1, w0, hi ; CHECK-NEXT: csel w9, w0, w1, hi ; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, ls +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: ubfx w8, w8, #1, #7 +; CHECK-NEXT: cneg w9, w9, ls ; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i8 %a1, %a2 @@ -459,15 +459,15 @@ define i8 @scalar_i8_signed_mem_reg(i8* %a1_addr, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_signed_mem_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb w9, [x0] -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: cmp w9, w1, sxtb -; CHECK-NEXT: csel w10, w1, w9, gt -; CHECK-NEXT: csel w11, w9, w1, gt -; CHECK-NEXT: sub w10, w11, w10 -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: ubfx w10, w10, #1, #7 -; CHECK-NEXT: madd w0, w10, w8, w9 +; CHECK-NEXT: ldrsb w8, [x0] +; CHECK-NEXT: cmp w8, w1, sxtb +; CHECK-NEXT: csel w9, w1, w8, gt +; CHECK-NEXT: csel w10, w8, w1, gt +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: ubfx w9, w9, #1, #7 +; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: madd w0, w9, w10, w8 ; CHECK-NEXT: ret %a1 = load i8, i8* %a1_addr %t3 = icmp sgt i8 %a1, %a2 ; signed @@ -484,15 +484,15 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, i8* %a2_addr) nounwind { ; CHECK-LABEL: scalar_i8_signed_reg_mem: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb w9, [x1] -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: csel w8, w9, w0, gt -; CHECK-NEXT: csel w9, w0, w9, gt -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, le +; CHECK-NEXT: ldrsb w8, [x1] +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w9, w8, w0, gt +; CHECK-NEXT: csel w8, w0, w8, gt +; CHECK-NEXT: sub w8, w8, w9 +; CHECK-NEXT: mov w9, #-1 ; CHECK-NEXT: ubfx w8, w8, #1, #7 +; CHECK-NEXT: cneg w9, w9, le ; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %a2 = load i8, i8* %a2_addr @@ -510,16 +510,16 @@ define i8 @scalar_i8_signed_mem_mem(i8* %a1_addr, i8* %a2_addr) nounwind { ; CHECK-LABEL: scalar_i8_signed_mem_mem: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb w9, [x0] -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: ldrsb w10, [x1] -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w11, w10, w9, gt -; CHECK-NEXT: csel w10, w9, w10, gt -; CHECK-NEXT: sub w10, w10, w11 -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: ubfx w10, w10, #1, #7 -; CHECK-NEXT: madd w0, w10, w8, w9 +; CHECK-NEXT: ldrsb w8, [x0] +; CHECK-NEXT: ldrsb w9, [x1] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w10, w9, w8, gt +; CHECK-NEXT: csel w9, w8, w9, gt +; CHECK-NEXT: sub w9, w9, w10 +; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: ubfx w9, w9, #1, #7 +; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: madd w0, w9, w10, w8 ; CHECK-NEXT: ret %a1 = load i8, i8* %a1_addr %a2 = load i8, i8* %a2_addr Index: llvm/test/CodeGen/AArch64/minmax-of-minmax.ll =================================================================== --- llvm/test/CodeGen/AArch64/minmax-of-minmax.ll +++ llvm/test/CodeGen/AArch64/minmax-of-minmax.ll @@ -10,9 +10,9 @@ define <4 x i32> @smin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -26,9 +26,9 @@ define <4 x i32> @smin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -74,9 +74,9 @@ define <4 x i32> @smin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -90,9 +90,9 @@ define <4 x i32> @smin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -138,9 +138,9 @@ define <4 x i32> @smin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -154,9 +154,9 @@ define <4 x i32> @smin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -202,9 +202,9 @@ define <4 x i32> @smin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -218,9 +218,9 @@ define <4 x i32> @smin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -266,9 +266,9 @@ define <4 x i32> @smax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -282,9 +282,9 @@ define <4 x i32> @smax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -330,9 +330,9 @@ define <4 x i32> @smax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -346,9 +346,9 @@ define <4 x i32> @smax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -394,9 +394,9 @@ define <4 x i32> @smax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -410,9 +410,9 @@ define <4 x i32> @smax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -458,9 +458,9 @@ define <4 x i32> @smax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -474,9 +474,9 @@ define <4 x i32> @smax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -522,9 +522,9 @@ define <4 x i32> @umin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -538,9 +538,9 @@ define <4 x i32> @umin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -586,9 +586,9 @@ define <4 x i32> @umin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -602,9 +602,9 @@ define <4 x i32> @umin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -650,9 +650,9 @@ define <4 x i32> @umin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -666,9 +666,9 @@ define <4 x i32> @umin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -714,9 +714,9 @@ define <4 x i32> @umin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -730,9 +730,9 @@ define <4 x i32> @umin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -778,9 +778,9 @@ define <4 x i32> @umax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -794,9 +794,9 @@ define <4 x i32> @umax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -842,9 +842,9 @@ define <4 x i32> @umax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -858,9 +858,9 @@ define <4 x i32> @umax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -906,9 +906,9 @@ define <4 x i32> @umax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -922,9 +922,9 @@ define <4 x i32> @umax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -970,9 +970,9 @@ define <4 x i32> @umax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -986,9 +986,9 @@ define <4 x i32> @umax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -1034,8 +1034,8 @@ define <4 x i32> @notted_smin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1056,8 +1056,8 @@ define <4 x i32> @notted_smin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1078,8 +1078,8 @@ define <4 x i32> @notted_smin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s @@ -1100,8 +1100,8 @@ define <4 x i32> @notted_smin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s @@ -1122,8 +1122,8 @@ define <4 x i32> @notted_smin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1144,8 +1144,8 @@ define <4 x i32> @notted_smin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1166,8 +1166,8 @@ define <4 x i32> @notted_smin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s @@ -1188,8 +1188,8 @@ define <4 x i32> @notted_smin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s @@ -1210,8 +1210,8 @@ define <4 x i32> @notted_smin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1232,8 +1232,8 @@ define <4 x i32> @notted_smin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1254,8 +1254,8 @@ define <4 x i32> @notted_smin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s @@ -1276,8 +1276,8 @@ define <4 x i32> @notted_smin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s @@ -1298,8 +1298,8 @@ define <4 x i32> @notted_smin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1320,8 +1320,8 @@ define <4 x i32> @notted_smin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1342,8 +1342,8 @@ define <4 x i32> @notted_smin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s @@ -1364,8 +1364,8 @@ define <4 x i32> @notted_smin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s @@ -1386,8 +1386,8 @@ define <4 x i32> @notted_smax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1408,8 +1408,8 @@ define <4 x i32> @notted_smax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1430,8 +1430,8 @@ define <4 x i32> @notted_smax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1452,8 +1452,8 @@ define <4 x i32> @notted_smax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s @@ -1474,8 +1474,8 @@ define <4 x i32> @notted_smax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1496,8 +1496,8 @@ define <4 x i32> @notted_smax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1518,8 +1518,8 @@ define <4 x i32> @notted_smax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1540,8 +1540,8 @@ define <4 x i32> @notted_smax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s @@ -1562,8 +1562,8 @@ define <4 x i32> @notted_smax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1584,8 +1584,8 @@ define <4 x i32> @notted_smax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1606,8 +1606,8 @@ define <4 x i32> @notted_smax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1628,8 +1628,8 @@ define <4 x i32> @notted_smax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s @@ -1650,8 +1650,8 @@ define <4 x i32> @notted_smax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1672,8 +1672,8 @@ define <4 x i32> @notted_smax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1694,8 +1694,8 @@ define <4 x i32> @notted_smax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1716,8 +1716,8 @@ define <4 x i32> @notted_smax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s @@ -1738,8 +1738,8 @@ define <4 x i32> @notted_umin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -1760,8 +1760,8 @@ define <4 x i32> @notted_umin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -1782,8 +1782,8 @@ define <4 x i32> @notted_umin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s @@ -1804,8 +1804,8 @@ define <4 x i32> @notted_umin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s @@ -1826,8 +1826,8 @@ define <4 x i32> @notted_umin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -1848,8 +1848,8 @@ define <4 x i32> @notted_umin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -1870,8 +1870,8 @@ define <4 x i32> @notted_umin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s @@ -1892,8 +1892,8 @@ define <4 x i32> @notted_umin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s @@ -1914,8 +1914,8 @@ define <4 x i32> @notted_umin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -1936,8 +1936,8 @@ define <4 x i32> @notted_umin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -1958,8 +1958,8 @@ define <4 x i32> @notted_umin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s @@ -1980,8 +1980,8 @@ define <4 x i32> @notted_umin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s @@ -2002,8 +2002,8 @@ define <4 x i32> @notted_umin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -2024,8 +2024,8 @@ define <4 x i32> @notted_umin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -2046,8 +2046,8 @@ define <4 x i32> @notted_umin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s @@ -2068,8 +2068,8 @@ define <4 x i32> @notted_umin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s @@ -2090,8 +2090,8 @@ define <4 x i32> @notted_umax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2112,8 +2112,8 @@ define <4 x i32> @notted_umax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2134,8 +2134,8 @@ define <4 x i32> @notted_umax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s @@ -2156,8 +2156,8 @@ define <4 x i32> @notted_umax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s @@ -2178,8 +2178,8 @@ define <4 x i32> @notted_umax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2200,8 +2200,8 @@ define <4 x i32> @notted_umax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2222,8 +2222,8 @@ define <4 x i32> @notted_umax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s @@ -2244,8 +2244,8 @@ define <4 x i32> @notted_umax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s @@ -2266,8 +2266,8 @@ define <4 x i32> @notted_umax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2288,8 +2288,8 @@ define <4 x i32> @notted_umax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2310,8 +2310,8 @@ define <4 x i32> @notted_umax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s @@ -2332,8 +2332,8 @@ define <4 x i32> @notted_umax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s @@ -2354,8 +2354,8 @@ define <4 x i32> @notted_umax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2376,8 +2376,8 @@ define <4 x i32> @notted_umax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2398,8 +2398,8 @@ define <4 x i32> @notted_umax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s @@ -2420,8 +2420,8 @@ define <4 x i32> @notted_umax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s Index: llvm/test/CodeGen/AArch64/minmax.ll =================================================================== --- llvm/test/CodeGen/AArch64/minmax.ll +++ llvm/test/CodeGen/AArch64/minmax.ll @@ -108,9 +108,9 @@ define <16 x i32> @t11(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: t11: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v6.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v4.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v5.4s +; CHECK-NEXT: smin v2.4s, v2.4s, v6.4s ; CHECK-NEXT: smin v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %t1 = icmp sle <16 x i32> %a, %b Index: llvm/test/CodeGen/AArch64/misched-fusion-lit.ll =================================================================== --- llvm/test/CodeGen/AArch64/misched-fusion-lit.ll +++ llvm/test/CodeGen/AArch64/misched-fusion-lit.ll @@ -32,7 +32,7 @@ ; CHECK-LABEL: litp_tune_generic: ; CHECK: adrp [[R:x[0-9]+]], litp_tune_generic -; CHECK-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic +; CHECKFUSE-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic } define dso_local i32 @liti(i32 %a, i32 %b) { Index: llvm/test/CodeGen/AArch64/mul_pow2.ll =================================================================== --- llvm/test/CodeGen/AArch64/mul_pow2.ll +++ llvm/test/CodeGen/AArch64/mul_pow2.ll @@ -723,8 +723,8 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: muladd_demand_commute: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1, msl #16 ; CHECK-NEXT: shl v0.4s, v0.4s, #6 +; CHECK-NEXT: movi v2.4s, #1, msl #16 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll =================================================================== --- llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll +++ llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll @@ -39,9 +39,8 @@ define <8 x i32> @splice_v8i32_idx(<8 x i32> %a, <8 x i32> %b) #0 { ; CHECK-LABEL: splice_v8i32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v3.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #4 -; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ret %res = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> %a, <8 x i32> %b, i32 5) ret <8 x i32> %res @@ -51,12 +50,10 @@ define <16 x float> @splice_v16f32_idx(<16 x float> %a, <16 x float> %b) #0 { ; CHECK-LABEL: splice_v16f32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v5.16b, v4.16b, v5.16b, #12 -; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12 +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12 -; CHECK-NEXT: mov v3.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: ext v3.16b, v4.16b, v5.16b, #12 ; CHECK-NEXT: ret %res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 7) ret <16 x float> %res @@ -98,9 +95,8 @@ define <8 x i32> @splice_v8i32(<8 x i32> %a, <8 x i32> %b) #0 { ; CHECK-LABEL: splice_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v3.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #4 -; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ret %res = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> %a, <8 x i32> %b, i32 -3) ret <8 x i32> %res @@ -110,12 +106,10 @@ define <16 x float> @splice_v16f32(<16 x float> %a, <16 x float> %b) #0 { ; CHECK-LABEL: splice_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v5.16b, v4.16b, v5.16b, #12 -; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12 +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12 -; CHECK-NEXT: mov v3.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: ext v3.16b, v4.16b, v5.16b, #12 ; CHECK-NEXT: ret %res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 -9) ret <16 x float> %res Index: llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll =================================================================== --- llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -259,9 +259,9 @@ ; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0x1 -; CHECK-NEXT: cmpne p0.d, p2/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv2i1( %a, %b, i32 1) ret %res @@ -274,9 +274,9 @@ ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-NEXT: ptrue p2.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z1.s, z1.s, #0x1 -; CHECK-NEXT: cmpne p0.s, p2/z, z1.s, #0 +; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv4i1( %a, %b, i32 2) ret %res @@ -289,9 +289,9 @@ ; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-NEXT: ptrue p2.h +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z1.h, z1.h, #0x1 -; CHECK-NEXT: cmpne p0.h, p2/z, z1.h, #0 +; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv8i1( %a, %b, i32 4) ret %res @@ -304,9 +304,9 @@ ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: and z1.b, z1.b, #0x1 -; CHECK-NEXT: cmpne p0.b, p2/z, z1.b, #0 +; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv16i1( %a, %b, i32 8) ret %res @@ -328,13 +328,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: orr x8, x8, #0x8 +; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] +; CHECK-NEXT: orr x8, x8, #0x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #4 @@ -352,18 +352,18 @@ ; CHECK-NEXT: addvl sp, sp, #-8 ; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: add x10, x9, x8, lsl #2 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] -; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl] ; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl] -; CHECK-NEXT: add x10, x9, x8, lsl #2 ; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl] ; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] @@ -452,16 +452,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: cmp x9, #17 -; CHECK-NEXT: mov w10, #17 -; CHECK-NEXT: csel x9, x9, x10, lo +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov w9, #17 +; CHECK-NEXT: cmp x8, #17 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: addvl x9, x10, #1 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -497,16 +497,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: cmp x9, #18 -; CHECK-NEXT: mov w10, #18 -; CHECK-NEXT: csel x9, x9, x10, lo +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov w9, #18 +; CHECK-NEXT: cmp x8, #18 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: addvl x9, x10, #1 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -608,16 +608,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: cmp x9, #18 -; CHECK-NEXT: mov w10, #18 -; CHECK-NEXT: csel x9, x9, x10, lo +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov w9, #18 +; CHECK-NEXT: cmp x8, #18 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: addvl x9, x10, #1 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -698,9 +698,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.d, vl1 ; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 -; CHECK-NEXT: rev p2.d, p2.d ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 -; CHECK-NEXT: splice z1.d, p2, z1.d, z0.d +; CHECK-NEXT: rev p0.d, p2.d +; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 @@ -715,9 +715,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.s, vl1 ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 -; CHECK-NEXT: rev p2.s, p2.s ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 -; CHECK-NEXT: splice z1.s, p2, z1.s, z0.s +; CHECK-NEXT: rev p0.s, p2.s +; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z1.s, z1.s, #0x1 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 @@ -732,9 +732,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.h, vl1 ; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1 -; CHECK-NEXT: rev p2.h, p2.h ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 -; CHECK-NEXT: splice z1.h, p2, z1.h, z0.h +; CHECK-NEXT: rev p0.h, p2.h +; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z1.h, z1.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 @@ -749,9 +749,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.b, vl1 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 -; CHECK-NEXT: rev p2.b, p2.b ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 -; CHECK-NEXT: splice z1.b, p2, z1.b, z0.b +; CHECK-NEXT: rev p0.b, p2.b +; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 @@ -779,14 +779,14 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov x9, #-8 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] -; CHECK-NEXT: sub x10, x8, #32 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] +; CHECK-NEXT: mov x9, #-8 +; CHECK-NEXT: sub x10, x8, #32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #4 @@ -802,15 +802,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-8 -; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: rdvl x8, #4 -; CHECK-NEXT: cmp x8, #68 ; CHECK-NEXT: mov w9, #68 -; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: cmp x8, #68 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: addvl x9, x10, #4 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: addvl x9, x9, #4 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] -; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] @@ -818,6 +817,7 @@ ; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl] ; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl] ; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl] +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x8, #2, mul vl] Index: llvm/test/CodeGen/AArch64/neg-imm.ll =================================================================== --- llvm/test/CodeGen/AArch64/neg-imm.ll +++ llvm/test/CodeGen/AArch64/neg-imm.ll @@ -20,9 +20,8 @@ ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %for.inc ; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: add w8, w20, #1 ; CHECK-NEXT: cmp w20, w19 -; CHECK-NEXT: mov w20, w8 +; CHECK-NEXT: add w20, w20, #1 ; CHECK-NEXT: b.gt .LBB0_4 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/AArch64/neon-abd.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-abd.ll +++ llvm/test/CodeGen/AArch64/neon-abd.ll @@ -147,20 +147,20 @@ ; CHECK-NEXT: mov x8, v0.d[1] ; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x9, v1.d[1] -; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: asr x12, x10, #63 -; CHECK-NEXT: asr x14, x8, #63 -; CHECK-NEXT: asr x15, x9, #63 +; CHECK-NEXT: fmov x13, d1 +; CHECK-NEXT: asr x11, x10, #63 +; CHECK-NEXT: asr x12, x8, #63 +; CHECK-NEXT: asr x14, x9, #63 +; CHECK-NEXT: asr x15, x13, #63 ; CHECK-NEXT: subs x8, x8, x9 -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: sbc x9, x14, x15 -; CHECK-NEXT: subs x10, x10, x11 +; CHECK-NEXT: sbc x9, x12, x14 +; CHECK-NEXT: subs x10, x10, x13 +; CHECK-NEXT: sbc x11, x11, x15 ; CHECK-NEXT: asr x9, x9, #63 -; CHECK-NEXT: sbc x11, x12, x13 -; CHECK-NEXT: eor x8, x8, x9 ; CHECK-NEXT: asr x11, x11, #63 -; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: eor x8, x8, x9 ; CHECK-NEXT: eor x10, x10, x11 +; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: sub x10, x10, x11 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fmov d0, x10 Index: llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -896,9 +896,9 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: vselect_equivalent_shuffle_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI89_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: adrp x8, .LCPI89_0 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI89_0] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b @@ -919,8 +919,8 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zero(<8 x i8> %a) { ; CHECK-LABEL: vselect_equivalent_shuffle_v8i8_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI90_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI90_0 ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI90_0] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b @@ -941,8 +941,8 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) { ; CHECK-LABEL: vselect_equivalent_shuffle_v8i8_zeroswap: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI91_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI91_0 ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI91_0] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b @@ -973,8 +973,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI92_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI92_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> Index: llvm/test/CodeGen/AArch64/neon-dotreduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -88,11 +88,11 @@ define i32 @test_udot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: udot v1.4s, v0.16b, v2.16b +; CHECK-NEXT: addv s0, v1.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -130,11 +130,11 @@ define i32 @test_sdot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: sdot v1.4s, v0.16b, v2.16b +; CHECK-NEXT: addv s0, v1.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/neon-extadd.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-extadd.ll +++ llvm/test/CodeGen/AArch64/neon-extadd.ll @@ -451,60 +451,60 @@ ; CHECK-NEXT: add x9, sp, #112 ; CHECK-NEXT: ld1 { v2.b }[1], [x10] ; CHECK-NEXT: add x10, sp, #8 -; CHECK-NEXT: add x11, sp, #128 -; CHECK-NEXT: add x12, sp, #184 +; CHECK-NEXT: add x12, sp, #176 +; CHECK-NEXT: ld1 { v3.b }[1], [x10] ; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: add x13, sp, #192 +; CHECK-NEXT: add x11, sp, #128 ; CHECK-NEXT: ld1 { v0.b }[2], [x9] ; CHECK-NEXT: add x9, sp, #120 -; CHECK-NEXT: ld1 { v3.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #16 +; CHECK-NEXT: ld1 { v2.b }[2], [x12] +; CHECK-NEXT: add x12, sp, #184 ; CHECK-NEXT: ldr b4, [sp, #224] +; CHECK-NEXT: add x10, sp, #144 ; CHECK-NEXT: mov v1.b[2], w2 ; CHECK-NEXT: ldr b5, [sp, #64] ; CHECK-NEXT: ld1 { v0.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #176 -; CHECK-NEXT: ld1 { v3.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #24 -; CHECK-NEXT: ld1 { v2.b }[2], [x9] ; CHECK-NEXT: add x9, sp, #136 -; CHECK-NEXT: ld1 { v0.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #144 -; CHECK-NEXT: ld1 { v3.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #32 -; CHECK-NEXT: mov v1.b[3], w3 ; CHECK-NEXT: ld1 { v2.b }[3], [x12] -; CHECK-NEXT: add x12, sp, #200 +; CHECK-NEXT: add x12, sp, #192 +; CHECK-NEXT: mov v1.b[3], w3 +; CHECK-NEXT: ld1 { v0.b }[4], [x11] +; CHECK-NEXT: add x11, sp, #16 +; CHECK-NEXT: ld1 { v3.b }[2], [x11] +; CHECK-NEXT: add x11, sp, #152 +; CHECK-NEXT: ld1 { v2.b }[4], [x12] +; CHECK-NEXT: mov v1.b[4], w4 ; CHECK-NEXT: ld1 { v0.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #152 +; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: ld1 { v3.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #232 +; CHECK-NEXT: ld1 { v4.b }[1], [x9] +; CHECK-NEXT: add x9, sp, #200 +; CHECK-NEXT: ld1 { v2.b }[5], [x9] +; CHECK-NEXT: add x9, sp, #40 +; CHECK-NEXT: ld1 { v0.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #32 ; CHECK-NEXT: ld1 { v3.b }[4], [x10] ; CHECK-NEXT: add x10, sp, #72 -; CHECK-NEXT: mov v1.b[4], w4 -; CHECK-NEXT: ld1 { v2.b }[4], [x13] -; CHECK-NEXT: add x13, sp, #232 -; CHECK-NEXT: ld1 { v0.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #40 ; CHECK-NEXT: ld1 { v5.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #80 -; CHECK-NEXT: ld1 { v4.b }[1], [x13] -; CHECK-NEXT: ld1 { v2.b }[5], [x12] -; CHECK-NEXT: add x12, sp, #240 -; CHECK-NEXT: ld1 { v0.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #208 -; CHECK-NEXT: ld1 { v3.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #216 +; CHECK-NEXT: add x10, sp, #208 +; CHECK-NEXT: ld1 { v2.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #48 ; CHECK-NEXT: mov v1.b[5], w5 -; CHECK-NEXT: ld1 { v4.b }[2], [x12] -; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #48 -; CHECK-NEXT: ld1 { v5.b }[2], [x10] -; CHECK-NEXT: add x12, sp, #248 +; CHECK-NEXT: ld1 { v0.b }[7], [x11] +; CHECK-NEXT: ld1 { v3.b }[5], [x9] +; CHECK-NEXT: add x9, sp, #240 +; CHECK-NEXT: ld1 { v4.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #80 +; CHECK-NEXT: ld1 { v5.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #216 +; CHECK-NEXT: ld1 { v2.b }[7], [x9] +; CHECK-NEXT: add x9, sp, #248 +; CHECK-NEXT: ld1 { v3.b }[6], [x10] ; CHECK-NEXT: add x10, sp, #56 -; CHECK-NEXT: ld1 { v3.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #88 ; CHECK-NEXT: mov v1.b[6], w6 -; CHECK-NEXT: ld1 { v4.b }[3], [x12] -; CHECK-NEXT: ld1 { v2.b }[7], [x11] +; CHECK-NEXT: ld1 { v4.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #88 ; CHECK-NEXT: ld1 { v5.b }[3], [x9] ; CHECK-NEXT: ld1 { v3.b }[7], [x10] ; CHECK-NEXT: mov v1.b[7], w7 @@ -532,83 +532,83 @@ define <16 x i32> @i12(<16 x i12> %s0, <16 x i12> %s1) { ; CHECK-LABEL: i12: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr w12, [sp, #32] -; CHECK-NEXT: fmov s5, w0 -; CHECK-NEXT: ldr w15, [sp] +; CHECK-NEXT: ldr w14, [sp, #32] ; CHECK-NEXT: fmov s4, w4 -; CHECK-NEXT: ldr w14, [sp, #40] -; CHECK-NEXT: fmov s0, w12 -; CHECK-NEXT: ldr w16, [sp, #48] -; CHECK-NEXT: fmov s1, w15 -; CHECK-NEXT: ldr w15, [sp, #8] -; CHECK-NEXT: ldr w18, [sp, #16] -; CHECK-NEXT: mov v0.h[1], w14 -; CHECK-NEXT: ldr w17, [sp, #56] -; CHECK-NEXT: mov v1.h[1], w15 +; CHECK-NEXT: ldr w15, [sp, #40] +; CHECK-NEXT: fmov s5, w0 +; CHECK-NEXT: ldr w16, [sp, #8] +; CHECK-NEXT: fmov s0, w14 +; CHECK-NEXT: ldr w14, [sp, #48] +; CHECK-NEXT: ldr w17, [sp, #16] +; CHECK-NEXT: mov v4.h[1], w5 ; CHECK-NEXT: ldr w0, [sp, #24] +; CHECK-NEXT: mov v0.h[1], w15 +; CHECK-NEXT: ldr w15, [sp] ; CHECK-NEXT: mov v5.h[1], w1 -; CHECK-NEXT: ldr w13, [sp, #64] +; CHECK-NEXT: ldr w11, [sp, #64] ; CHECK-NEXT: ldr w1, [sp, #128] -; CHECK-NEXT: mov v0.h[2], w16 -; CHECK-NEXT: ldr w16, [sp, #96] -; CHECK-NEXT: mov v1.h[2], w18 +; CHECK-NEXT: fmov s1, w15 ; CHECK-NEXT: ldr w10, [sp, #72] +; CHECK-NEXT: mov v0.h[2], w14 +; CHECK-NEXT: ldr w14, [sp, #56] +; CHECK-NEXT: mov v4.h[2], w6 +; CHECK-NEXT: ldr w15, [sp, #104] +; CHECK-NEXT: mov v1.h[1], w16 +; CHECK-NEXT: ldr w16, [sp, #96] ; CHECK-NEXT: mov v5.h[2], w2 ; CHECK-NEXT: ldr w2, [sp, #160] -; CHECK-NEXT: mov v4.h[1], w5 +; CHECK-NEXT: mov v0.h[3], w14 +; CHECK-NEXT: ldr w4, [sp, #136] +; CHECK-NEXT: mov v4.h[3], w7 ; CHECK-NEXT: ldr w5, [sp, #168] -; CHECK-NEXT: mov v0.h[3], w17 -; CHECK-NEXT: ldr w14, [sp, #104] -; CHECK-NEXT: mov v1.h[3], w0 -; CHECK-NEXT: ldr w18, [sp, #136] -; CHECK-NEXT: fmov s6, w1 -; CHECK-NEXT: ldr w0, [sp, #176] -; CHECK-NEXT: fmov s7, w16 -; CHECK-NEXT: fmov s16, w13 -; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: mov v1.h[2], w17 ; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: mov v5.h[3], w3 +; CHECK-NEXT: ldr w13, [sp, #112] +; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: ldr w18, [sp, #144] ; CHECK-NEXT: movi v0.4s, #15, msl #8 -; CHECK-NEXT: ldr w12, [sp, #112] -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ldr w17, [sp, #144] -; CHECK-NEXT: mov v6.h[1], w18 -; CHECK-NEXT: ldr w4, [sp, #184] -; CHECK-NEXT: mov v7.h[1], w14 ; CHECK-NEXT: ldr w8, [sp, #88] +; CHECK-NEXT: mov v1.h[3], w0 +; CHECK-NEXT: ldr w0, [sp, #176] +; CHECK-NEXT: fmov s6, w1 +; CHECK-NEXT: ldr w12, [sp, #120] +; CHECK-NEXT: fmov s7, w16 +; CHECK-NEXT: fmov s16, w11 ; CHECK-NEXT: and v3.16b, v2.16b, v0.16b -; CHECK-NEXT: ldr w11, [sp, #120] +; CHECK-NEXT: ldr w14, [sp, #152] +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ldr w17, [sp, #184] +; CHECK-NEXT: mov v6.h[1], w4 ; CHECK-NEXT: and v2.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr w15, [sp, #152] -; CHECK-NEXT: fmov s1, w2 +; CHECK-NEXT: ushll v1.4s, v4.4h, #0 +; CHECK-NEXT: ushll v4.4s, v5.4h, #0 +; CHECK-NEXT: fmov s5, w2 +; CHECK-NEXT: mov v7.h[1], w15 ; CHECK-NEXT: mov v16.h[1], w10 -; CHECK-NEXT: mov v4.h[2], w6 -; CHECK-NEXT: mov v1.h[1], w5 -; CHECK-NEXT: mov v6.h[2], w17 -; CHECK-NEXT: mov v7.h[2], w12 +; CHECK-NEXT: mov v5.h[1], w5 +; CHECK-NEXT: mov v6.h[2], w18 +; CHECK-NEXT: mov v7.h[2], w13 ; CHECK-NEXT: mov v16.h[2], w9 -; CHECK-NEXT: mov v1.h[2], w0 -; CHECK-NEXT: mov v4.h[3], w7 -; CHECK-NEXT: mov v5.h[3], w3 -; CHECK-NEXT: mov v6.h[3], w15 -; CHECK-NEXT: mov v1.h[3], w4 -; CHECK-NEXT: mov v7.h[3], w11 +; CHECK-NEXT: mov v5.h[2], w0 +; CHECK-NEXT: mov v6.h[3], w14 +; CHECK-NEXT: mov v7.h[3], w12 ; CHECK-NEXT: mov v16.h[3], w8 -; CHECK-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ushll v5.4s, v5.4h, #0 +; CHECK-NEXT: mov v5.h[3], w17 ; CHECK-NEXT: ushll v6.4s, v6.4h, #0 -; CHECK-NEXT: and v17.16b, v1.16b, v0.16b -; CHECK-NEXT: ushll v1.4s, v7.4h, #0 -; CHECK-NEXT: ushll v7.4s, v16.4h, #0 +; CHECK-NEXT: ushll v7.4s, v7.4h, #0 +; CHECK-NEXT: ushll v16.4s, v16.4h, #0 +; CHECK-NEXT: ushll v5.4s, v5.4h, #0 +; CHECK-NEXT: and v1.16b, v1.16b, v0.16b ; CHECK-NEXT: and v4.16b, v4.16b, v0.16b ; CHECK-NEXT: and v5.16b, v5.16b, v0.16b ; CHECK-NEXT: and v6.16b, v6.16b, v0.16b -; CHECK-NEXT: and v1.16b, v1.16b, v0.16b -; CHECK-NEXT: and v0.16b, v7.16b, v0.16b -; CHECK-NEXT: add v0.4s, v5.4s, v0.4s -; CHECK-NEXT: add v1.4s, v4.4s, v1.4s +; CHECK-NEXT: and v7.16b, v7.16b, v0.16b +; CHECK-NEXT: and v0.16b, v16.16b, v0.16b +; CHECK-NEXT: add v0.4s, v4.4s, v0.4s +; CHECK-NEXT: add v1.4s, v1.4s, v7.4s ; CHECK-NEXT: add v2.4s, v2.4s, v6.4s -; CHECK-NEXT: add v3.4s, v3.4s, v17.4s +; CHECK-NEXT: add v3.4s, v3.4s, v5.4s ; CHECK-NEXT: ret entry: %s0s = zext <16 x i12> %s0 to <16 x i32> Index: llvm/test/CodeGen/AArch64/neon-extracttruncate.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-extracttruncate.ll +++ llvm/test/CodeGen/AArch64/neon-extracttruncate.ll @@ -41,16 +41,16 @@ ; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: mov w10, v0.s[3] ; CHECK-NEXT: mov v0.b[1], w8 -; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov v0.b[2], w9 -; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: mov v0.b[3], w10 -; CHECK-NEXT: mov v0.b[4], w8 -; CHECK-NEXT: mov w8, v1.s[2] -; CHECK-NEXT: mov v0.b[5], w9 -; CHECK-NEXT: mov w9, v1.s[3] -; CHECK-NEXT: mov v0.b[6], w8 -; CHECK-NEXT: mov v0.b[7], w9 +; CHECK-NEXT: mov v0.b[4], w9 +; CHECK-NEXT: mov w9, v1.s[2] +; CHECK-NEXT: mov v0.b[5], w8 +; CHECK-NEXT: mov w8, v1.s[3] +; CHECK-NEXT: mov v0.b[6], w9 +; CHECK-NEXT: mov v0.b[7], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -268,11 +268,11 @@ define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: extract_4_v4i32_badindex: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI5_0 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI5_0] ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI5_0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/neon-mla-mls.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-mla-mls.ll +++ llvm/test/CodeGen/AArch64/neon-mla-mls.ll @@ -138,8 +138,9 @@ define <8 x i8> @mls2v8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { ; CHECK-LABEL: mls2v8xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.8b, v0.8b, v1.8b -; CHECK-NEXT: sub v0.8b, v0.8b, v2.8b +; CHECK-NEXT: neg v2.8b, v2.8b +; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <8 x i8> %A, %B; %tmp2 = sub <8 x i8> %tmp1, %C; @@ -149,8 +150,9 @@ define <16 x i8> @mls2v16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { ; CHECK-LABEL: mls2v16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.16b, v0.16b, v1.16b -; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b +; CHECK-NEXT: neg v2.16b, v2.16b +; CHECK-NEXT: mla v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp1 = mul <16 x i8> %A, %B; %tmp2 = sub <16 x i8> %tmp1, %C; @@ -160,8 +162,9 @@ define <4 x i16> @mls2v4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { ; CHECK-LABEL: mls2v4xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h -; CHECK-NEXT: sub v0.4h, v0.4h, v2.4h +; CHECK-NEXT: neg v2.4h, v2.4h +; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <4 x i16> %A, %B; %tmp2 = sub <4 x i16> %tmp1, %C; @@ -171,8 +174,9 @@ define <8 x i16> @mls2v8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { ; CHECK-LABEL: mls2v8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h -; CHECK-NEXT: sub v0.8h, v0.8h, v2.8h +; CHECK-NEXT: neg v2.8h, v2.8h +; CHECK-NEXT: mla v2.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp1 = mul <8 x i16> %A, %B; %tmp2 = sub <8 x i16> %tmp1, %C; @@ -182,8 +186,9 @@ define <2 x i32> @mls2v2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { ; CHECK-LABEL: mls2v2xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s +; CHECK-NEXT: neg v2.2s, v2.2s +; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <2 x i32> %A, %B; %tmp2 = sub <2 x i32> %tmp1, %C; @@ -193,8 +198,9 @@ define <4 x i32> @mls2v4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { ; CHECK-LABEL: mls2v4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: neg v2.4s, v2.4s +; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp1 = mul <4 x i32> %A, %B; %tmp2 = sub <4 x i32> %tmp1, %C; Index: llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll +++ llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll @@ -48,8 +48,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -83,8 +83,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/neon-stepvector.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-stepvector.ll +++ llvm/test/CodeGen/AArch64/neon-stepvector.ll @@ -126,12 +126,12 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: adrp x9, .LCPI5_1 -; CHECK-NEXT: adrp x10, .LCPI5_2 -; CHECK-NEXT: adrp x11, .LCPI5_3 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: adrp x8, .LCPI5_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] +; CHECK-NEXT: adrp x8, .LCPI5_3 ; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI5_1] -; CHECK-NEXT: ldr q2, [x10, :lo12:.LCPI5_2] -; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI5_3] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_3] ; CHECK-NEXT: ret entry: %0 = call <16 x i32> @llvm.experimental.stepvector.v16i32() Index: llvm/test/CodeGen/AArch64/neon-truncstore.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-truncstore.ll +++ llvm/test/CodeGen/AArch64/neon-truncstore.ll @@ -140,11 +140,11 @@ define void @v32i32_v32i8(<32 x i32> %a, <32 x i8>* %result) { ; CHECK-LABEL: v32i32_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: uzp1 v6.8h, v6.8h, v7.8h ; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h -; CHECK-NEXT: uzp1 v3.8h, v4.8h, v5.8h +; CHECK-NEXT: uzp1 v3.8h, v6.8h, v7.8h +; CHECK-NEXT: uzp1 v4.8h, v4.8h, v5.8h ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-NEXT: uzp1 v1.16b, v3.16b, v6.16b +; CHECK-NEXT: uzp1 v1.16b, v4.16b, v3.16b ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/neon-wide-splat.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-wide-splat.ll +++ llvm/test/CodeGen/AArch64/neon-wide-splat.ll @@ -131,8 +131,8 @@ define <8 x i8> @shuffle_not4(<8 x i8> %v) { ; CHECK-LABEL: shuffle_not4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b Index: llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll +++ llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll @@ -138,8 +138,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -153,8 +153,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -168,8 +168,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/nontemporal.ll =================================================================== --- llvm/test/CodeGen/AArch64/nontemporal.ll +++ llvm/test/CodeGen/AArch64/nontemporal.ll @@ -450,43 +450,43 @@ define void @test_stnp_v17f32(<17 x float> %v, <17 x float>* %ptr) { ; CHECK-LABEL: test_stnp_v17f32: ; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; kill: def $s5 killed $s5 def $q5 +; CHECK-NEXT: ; kill: def $s4 killed $s4 def $q4 ; CHECK-NEXT: ldr s16, [sp, #16] ; CHECK-NEXT: add x8, sp, #20 -; CHECK-NEXT: ldr s17, [sp] -; CHECK-NEXT: add x9, sp, #4 -; CHECK-NEXT: ; kill: def $s4 killed $s4 def $q4 +; CHECK-NEXT: mov.s v4[1], v5[0] +; CHECK-NEXT: ldr s5, [sp] ; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ; kill: def $s5 killed $s5 def $q5 ; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: ; kill: def $s6 killed $s6 def $q6 ; CHECK-NEXT: ; kill: def $s2 killed $s2 def $q2 ; CHECK-NEXT: ; kill: def $s7 killed $s7 def $q7 ; CHECK-NEXT: ; kill: def $s3 killed $s3 def $q3 ; CHECK-NEXT: ld1.s { v16 }[1], [x8] -; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: ld1.s { v17 }[1], [x9] -; CHECK-NEXT: add x9, sp, #8 -; CHECK-NEXT: mov.s v4[1], v5[0] +; CHECK-NEXT: add x8, sp, #4 ; CHECK-NEXT: mov.s v0[1], v1[0] -; CHECK-NEXT: ld1.s { v16 }[2], [x8] -; CHECK-NEXT: add x8, sp, #28 -; CHECK-NEXT: ld1.s { v17 }[2], [x9] -; CHECK-NEXT: add x9, sp, #12 +; CHECK-NEXT: ld1.s { v5 }[1], [x8] ; CHECK-NEXT: mov.s v4[2], v6[0] +; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: ld1.s { v16 }[2], [x8] +; CHECK-NEXT: add x8, sp, #8 ; CHECK-NEXT: mov.s v0[2], v2[0] -; CHECK-NEXT: ld1.s { v16 }[3], [x8] -; CHECK-NEXT: ld1.s { v17 }[3], [x9] +; CHECK-NEXT: ld1.s { v5 }[2], [x8] ; CHECK-NEXT: mov.s v4[3], v7[0] +; CHECK-NEXT: add x8, sp, #28 +; CHECK-NEXT: ld1.s { v16 }[3], [x8] +; CHECK-NEXT: add x8, sp, #12 ; CHECK-NEXT: mov.s v0[3], v3[0] -; CHECK-NEXT: mov d1, v16[1] -; CHECK-NEXT: mov d2, v17[1] -; CHECK-NEXT: mov d3, v4[1] -; CHECK-NEXT: mov d5, v0[1] -; CHECK-NEXT: stnp d16, d1, [x0, #48] +; CHECK-NEXT: ld1.s { v5 }[3], [x8] +; CHECK-NEXT: mov d1, v4[1] +; CHECK-NEXT: mov d3, v16[1] +; CHECK-NEXT: mov d2, v0[1] +; CHECK-NEXT: mov d6, v5[1] +; CHECK-NEXT: stnp d4, d1, [x0, #16] ; CHECK-NEXT: ldr s1, [sp, #32] -; CHECK-NEXT: stnp d17, d2, [x0, #32] -; CHECK-NEXT: stnp d4, d3, [x0, #16] -; CHECK-NEXT: stnp d0, d5, [x0] +; CHECK-NEXT: stnp d16, d3, [x0, #48] +; CHECK-NEXT: stnp d0, d2, [x0] +; CHECK-NEXT: stnp d5, d6, [x0, #32] ; CHECK-NEXT: str s1, [x0, #64] ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/nzcv-save.ll =================================================================== --- llvm/test/CodeGen/AArch64/nzcv-save.ll +++ llvm/test/CodeGen/AArch64/nzcv-save.ll @@ -12,14 +12,14 @@ ; CHECK-NEXT: ldp x14, x15, [x3, #16] ; CHECK-NEXT: adds x9, x9, x11 ; CHECK-NEXT: adcs x8, x8, x10 +; CHECK-NEXT: stp x9, x8, [x0] ; CHECK-NEXT: adcs x10, x13, x14 ; CHECK-NEXT: adc x11, x12, x15 -; CHECK-NEXT: orr x12, x12, #0x100 -; CHECK-NEXT: adc x12, x12, x15 -; CHECK-NEXT: stp x9, x8, [x0] ; CHECK-NEXT: stp x10, x11, [x0, #16] -; CHECK-NEXT: stp x10, x12, [x1, #16] +; CHECK-NEXT: orr x11, x12, #0x100 +; CHECK-NEXT: adc x11, x11, x15 ; CHECK-NEXT: stp x9, x8, [x1] +; CHECK-NEXT: stp x10, x11, [x1, #16] ; CHECK-NEXT: ret entry: %c = load i256, i256* %cc Index: llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll =================================================================== --- llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll +++ llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll @@ -5,18 +5,18 @@ ; CHECK-LABEL: jsimd_idct_ifast_neon_intrinsic: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr q0, [x1, #32] -; CHECK-NEXT: mov w8, w3 +; CHECK-NEXT: mov w9, w3 ; CHECK-NEXT: ldr q1, [x1, #96] ; CHECK-NEXT: ldr q2, [x0, #32] ; CHECK-NEXT: ldr q3, [x0, #96] -; CHECK-NEXT: ldr x9, [x2, #48] +; CHECK-NEXT: ldr x8, [x2, #48] ; CHECK-NEXT: mul v0.8h, v2.8h, v0.8h ; CHECK-NEXT: mul v1.8h, v3.8h, v1.8h ; CHECK-NEXT: add v2.8h, v0.8h, v1.8h -; CHECK-NEXT: str q2, [x9, x8] -; CHECK-NEXT: ldr x9, [x2, #56] +; CHECK-NEXT: str q2, [x8, x9] +; CHECK-NEXT: ldr x8, [x2, #56] ; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h -; CHECK-NEXT: str q0, [x9, x8] +; CHECK-NEXT: str q0, [x8, x9] ; CHECK-NEXT: ret entry: %add.ptr5 = getelementptr inbounds i16, i16* %coef_block, i64 16 Index: llvm/test/CodeGen/AArch64/peephole-and-tst.ll =================================================================== --- llvm/test/CodeGen/AArch64/peephole-and-tst.ll +++ llvm/test/CodeGen/AArch64/peephole-and-tst.ll @@ -8,11 +8,10 @@ define i32 @test_func_i32_two_uses(i32 %in, i32 %bit, i32 %mask) { ; CHECK-LABEL: test_func_i32_two_uses: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:ptr_wrapper -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ldr x9, [x9, :got_lo12:ptr_wrapper] -; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: adrp x8, :got:ptr_wrapper +; CHECK-NEXT: ldr x8, [x8, :got_lo12:ptr_wrapper] +; CHECK-NEXT: ldr x9, [x8] +; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .LBB0_1: // in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: str xzr, [x9, #8] @@ -21,9 +20,9 @@ ; CHECK-NEXT: cbz w1, .LBB0_6 ; CHECK-NEXT: .LBB0_3: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ands w10, w1, w8 -; CHECK-NEXT: and w11, w2, w8 -; CHECK-NEXT: cinc w0, w0, ne +; CHECK-NEXT: ands w10, w1, w0 +; CHECK-NEXT: and w11, w2, w0 +; CHECK-NEXT: cinc w8, w8, ne ; CHECK-NEXT: cmp w10, w11 ; CHECK-NEXT: b.eq .LBB0_1 ; CHECK-NEXT: // %bb.4: // %do.body @@ -34,6 +33,7 @@ ; CHECK-NEXT: cbz w10, .LBB0_2 ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_6: // %do.end +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret entry: %0 = load %struct.anon*, %struct.anon** @ptr_wrapper, align 8 @@ -72,25 +72,25 @@ define i32 @test_func_i64_one_use(i64 %in, i64 %bit, i64 %mask) { ; CHECK-LABEL: test_func_i64_one_use: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:ptr_wrapper -; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ldr x9, [x9, :got_lo12:ptr_wrapper] -; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: adrp x8, :got:ptr_wrapper +; CHECK-NEXT: ldr x8, [x8, :got_lo12:ptr_wrapper] +; CHECK-NEXT: ldr x9, [x8] +; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: b .LBB1_2 ; CHECK-NEXT: .LBB1_1: // in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: lsl x1, x1, #1 ; CHECK-NEXT: cbz x1, .LBB1_4 ; CHECK-NEXT: .LBB1_2: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ands x10, x1, x8 +; CHECK-NEXT: ands x10, x1, x0 ; CHECK-NEXT: orr x10, x2, x10 -; CHECK-NEXT: cinc w0, w0, ne +; CHECK-NEXT: cinc w8, w8, ne ; CHECK-NEXT: cbz x10, .LBB1_1 ; CHECK-NEXT: // %bb.3: // in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: str xzr, [x9, #8] ; CHECK-NEXT: b .LBB1_1 ; CHECK-NEXT: .LBB1_4: // %do.end +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret entry: %0 = load %struct.anon*, %struct.anon** @ptr_wrapper, align 8 Index: llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll =================================================================== --- llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -60,80 +60,80 @@ ; CHECK-NEXT: // implicit-def: $q13 ; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: mov x12, xzr ; CHECK-NEXT: stp q15, q14, [sp] // 32-byte Folded Spill ; CHECK-NEXT: ldr q14, [x8] -; CHECK-NEXT: add x15, x11, x8 -; CHECK-NEXT: ldr q15, [x10], #64 -; CHECK-NEXT: ldr q0, [x12] +; CHECK-NEXT: mov x12, xzr +; CHECK-NEXT: add x4, x11, x8 +; CHECK-NEXT: ldr q15, [x12] ; CHECK-NEXT: add x9, x9, #1 ; CHECK-NEXT: ldr x12, [x12] -; CHECK-NEXT: fmov x13, d14 -; CHECK-NEXT: mov x14, v14.d[1] -; CHECK-NEXT: fmov x0, d15 -; CHECK-NEXT: fmov x16, d0 -; CHECK-NEXT: ldr x15, [x15, #128] -; CHECK-NEXT: mul x17, x13, x12 -; CHECK-NEXT: mov x18, v0.d[1] -; CHECK-NEXT: mul x4, x0, x12 -; CHECK-NEXT: mul x1, x16, x12 -; CHECK-NEXT: mul x3, x14, x12 -; CHECK-NEXT: fmov d0, x17 -; CHECK-NEXT: mul x5, x13, x15 -; CHECK-NEXT: mov x17, v15.d[1] -; CHECK-NEXT: fmov d15, x4 +; CHECK-NEXT: fmov x14, d14 +; CHECK-NEXT: ldr q0, [x10], #64 +; CHECK-NEXT: mov x13, v14.d[1] +; CHECK-NEXT: fmov x17, d15 +; CHECK-NEXT: mul x18, x14, x12 +; CHECK-NEXT: ldr x4, [x4, #128] +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldr x2, [x8] +; CHECK-NEXT: mov x15, v0.d[1] +; CHECK-NEXT: mul x1, x17, x12 +; CHECK-NEXT: mov x16, v15.d[1] +; CHECK-NEXT: add x8, x8, #8 +; CHECK-NEXT: mul x3, x0, x12 +; CHECK-NEXT: fmov d0, x18 +; CHECK-NEXT: mul x18, x13, x12 +; CHECK-NEXT: cmp x8, #64 ; CHECK-NEXT: fmov d14, x1 -; CHECK-NEXT: mul x1, x18, x12 -; CHECK-NEXT: mov v0.d[1], x3 -; CHECK-NEXT: mul x3, x16, x15 -; CHECK-NEXT: ldr x2, [x8], #8 -; CHECK-NEXT: mul x12, x17, x12 -; CHECK-NEXT: fmov d1, x5 -; CHECK-NEXT: mov v14.d[1], x1 -; CHECK-NEXT: mul x1, x14, x15 -; CHECK-NEXT: add v12.2d, v12.2d, v0.2d -; CHECK-NEXT: mul x13, x13, x2 -; CHECK-NEXT: fmov d0, x3 -; CHECK-NEXT: mul x3, x0, x15 +; CHECK-NEXT: mul x1, x17, x2 +; CHECK-NEXT: mul x5, x16, x12 +; CHECK-NEXT: mul x12, x15, x12 +; CHECK-NEXT: fmov d15, x3 +; CHECK-NEXT: mov v0.d[1], x18 +; CHECK-NEXT: mul x18, x14, x4 +; CHECK-NEXT: mul x17, x17, x4 +; CHECK-NEXT: fmov d1, x1 +; CHECK-NEXT: mov v14.d[1], x5 +; CHECK-NEXT: mul x5, x16, x2 ; CHECK-NEXT: mov v15.d[1], x12 -; CHECK-NEXT: mul x12, x18, x2 -; CHECK-NEXT: mov v1.d[1], x1 -; CHECK-NEXT: mul x18, x18, x15 -; CHECK-NEXT: mul x16, x16, x2 -; CHECK-NEXT: cmp x8, #64 -; CHECK-NEXT: mul x15, x17, x15 +; CHECK-NEXT: mul x12, x13, x4 +; CHECK-NEXT: add v12.2d, v12.2d, v0.2d +; CHECK-NEXT: mul x16, x16, x4 +; CHECK-NEXT: fmov d0, x18 +; CHECK-NEXT: mul x18, x15, x4 ; CHECK-NEXT: add v13.2d, v13.2d, v14.2d ; CHECK-NEXT: mul x14, x14, x2 ; CHECK-NEXT: add v11.2d, v11.2d, v14.2d -; CHECK-NEXT: fmov d14, x3 -; CHECK-NEXT: add v10.2d, v10.2d, v15.2d -; CHECK-NEXT: fmov d15, x13 -; CHECK-NEXT: mov v0.d[1], x18 -; CHECK-NEXT: mul x13, x0, x2 -; CHECK-NEXT: add v29.2d, v29.2d, v1.2d -; CHECK-NEXT: fmov d1, x16 -; CHECK-NEXT: mov v14.d[1], x15 -; CHECK-NEXT: mov v15.d[1], x14 -; CHECK-NEXT: mov v1.d[1], x12 -; CHECK-NEXT: mul x12, x17, x2 -; CHECK-NEXT: add v28.2d, v28.2d, v0.2d -; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: add v27.2d, v27.2d, v14.2d -; CHECK-NEXT: ldr q14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add v8.2d, v8.2d, v15.2d +; CHECK-NEXT: mul x13, x13, x2 ; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: add v25.2d, v25.2d, v15.2d -; CHECK-NEXT: add v22.2d, v22.2d, v15.2d -; CHECK-NEXT: add v18.2d, v18.2d, v15.2d -; CHECK-NEXT: add v6.2d, v6.2d, v15.2d -; CHECK-NEXT: add v14.2d, v14.2d, v15.2d -; CHECK-NEXT: ldr q15, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mul x12, x0, x4 +; CHECK-NEXT: add v10.2d, v10.2d, v15.2d +; CHECK-NEXT: fmov d14, x17 +; CHECK-NEXT: mov v1.d[1], x5 +; CHECK-NEXT: fmov d15, x12 +; CHECK-NEXT: mul x12, x15, x2 +; CHECK-NEXT: mov v14.d[1], x16 +; CHECK-NEXT: add v29.2d, v29.2d, v0.2d +; CHECK-NEXT: mov v15.d[1], x18 +; CHECK-NEXT: fmov d0, x14 +; CHECK-NEXT: add v28.2d, v28.2d, v14.2d ; CHECK-NEXT: add v9.2d, v9.2d, v1.2d +; CHECK-NEXT: add v27.2d, v27.2d, v15.2d +; CHECK-NEXT: ldp q15, q14, [sp] // 32-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mul x13, x0, x2 ; CHECK-NEXT: add v31.2d, v31.2d, v1.2d ; CHECK-NEXT: add v26.2d, v26.2d, v1.2d ; CHECK-NEXT: add v23.2d, v23.2d, v1.2d +; CHECK-NEXT: add v8.2d, v8.2d, v0.2d +; CHECK-NEXT: add v25.2d, v25.2d, v0.2d +; CHECK-NEXT: add v22.2d, v22.2d, v0.2d +; CHECK-NEXT: add v18.2d, v18.2d, v0.2d +; CHECK-NEXT: add v6.2d, v6.2d, v0.2d +; CHECK-NEXT: add v14.2d, v14.2d, v0.2d +; CHECK-NEXT: fmov d0, x13 ; CHECK-NEXT: add v21.2d, v21.2d, v1.2d ; CHECK-NEXT: add v19.2d, v19.2d, v1.2d +; CHECK-NEXT: mov v0.d[1], x12 ; CHECK-NEXT: add v17.2d, v17.2d, v1.2d ; CHECK-NEXT: add v7.2d, v7.2d, v1.2d ; CHECK-NEXT: add v5.2d, v5.2d, v1.2d Index: llvm/test/CodeGen/AArch64/rand.ll =================================================================== --- llvm/test/CodeGen/AArch64/rand.ll +++ llvm/test/CodeGen/AArch64/rand.ll @@ -7,9 +7,9 @@ ; CHECK-NEXT: mrs x10, RNDR ; CHECK-NEXT: mov x9, x0 ; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: ret %1 = tail call { i64, i1 } @llvm.aarch64.rndr() %2 = extractvalue { i64, i1 } %1, 0 @@ -26,9 +26,9 @@ ; CHECK-NEXT: mrs x10, RNDRRS ; CHECK-NEXT: mov x9, x0 ; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: ret %1 = tail call { i64, i1 } @llvm.aarch64.rndrrs() %2 = extractvalue { i64, i1 } %1, 0 Index: llvm/test/CodeGen/AArch64/reduce-and.ll =================================================================== --- llvm/test/CodeGen/AArch64/reduce-and.ll +++ llvm/test/CodeGen/AArch64/reduce-and.ll @@ -109,16 +109,16 @@ ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: fmov w13, s7 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -302,16 +302,16 @@ ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: fmov w13, s7 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w0, w8, w9 ; GISEL-NEXT: ret %and_result = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a) Index: llvm/test/CodeGen/AArch64/reduce-or.ll =================================================================== --- llvm/test/CodeGen/AArch64/reduce-or.ll +++ llvm/test/CodeGen/AArch64/reduce-or.ll @@ -109,16 +109,16 @@ ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w13, s7 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -301,16 +301,16 @@ ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w13, s7 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w0, w8, w9 ; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a) Index: llvm/test/CodeGen/AArch64/reduce-shuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/reduce-shuffle.ll +++ llvm/test/CodeGen/AArch64/reduce-shuffle.ll @@ -4,137 +4,137 @@ define i32 @v1(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocapture noundef readonly %p2, i32 noundef %i2) { ; CHECK-LABEL: v1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: sxtw x10, w3 +; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 +; CHECK-NEXT: sxtw x11, w3 ; CHECK-NEXT: add x9, x0, x8 -; CHECK-NEXT: add x12, x2, x10 -; CHECK-NEXT: add x11, x9, x8 -; CHECK-NEXT: add x13, x12, x10 -; CHECK-NEXT: add x8, x11, x8 -; CHECK-NEXT: add x10, x13, x10 -; CHECK-NEXT: ldp s1, s0, [x9] +; CHECK-NEXT: add x12, x2, x11 +; CHECK-NEXT: add x10, x9, x8 +; CHECK-NEXT: add x8, x10, x8 +; CHECK-NEXT: ldp s1, s0, [x8] +; CHECK-NEXT: add x8, x12, x11 +; CHECK-NEXT: add x11, x8, x11 +; CHECK-NEXT: ldp s3, s2, [x9] +; CHECK-NEXT: ldp s5, s4, [x11] ; CHECK-NEXT: ldp s7, s6, [x12] -; CHECK-NEXT: ldp s3, s2, [x8] -; CHECK-NEXT: ldp s5, s4, [x10] -; CHECK-NEXT: ld1 { v5.s }[1], [x13], #4 -; CHECK-NEXT: ld1 { v3.s }[1], [x11], #4 +; CHECK-NEXT: ld1 { v5.s }[1], [x8], #4 +; CHECK-NEXT: ld1 { v1.s }[1], [x10], #4 ; CHECK-NEXT: ld1 { v7.s }[1], [x2], #4 -; CHECK-NEXT: ld1 { v1.s }[1], [x0], #4 -; CHECK-NEXT: ld1 { v4.s }[1], [x13] -; CHECK-NEXT: ld1 { v2.s }[1], [x11] +; CHECK-NEXT: ld1 { v3.s }[1], [x0], #4 +; CHECK-NEXT: ld1 { v4.s }[1], [x8] +; CHECK-NEXT: ld1 { v0.s }[1], [x10] ; CHECK-NEXT: ld1 { v6.s }[1], [x2] -; CHECK-NEXT: ld1 { v0.s }[1], [x0] -; CHECK-NEXT: usubl v3.8h, v3.8b, v5.8b -; CHECK-NEXT: usubl v2.8h, v2.8b, v4.8b -; CHECK-NEXT: usubl v1.8h, v1.8b, v7.8b -; CHECK-NEXT: usubl v0.8h, v0.8b, v6.8b -; CHECK-NEXT: shll v4.4s, v2.4h, #16 -; CHECK-NEXT: shll2 v2.4s, v2.8h, #16 -; CHECK-NEXT: shll v5.4s, v0.4h, #16 +; CHECK-NEXT: ld1 { v2.s }[1], [x0] +; CHECK-NEXT: usubl v1.8h, v1.8b, v5.8b +; CHECK-NEXT: usubl v0.8h, v0.8b, v4.8b +; CHECK-NEXT: usubl v3.8h, v3.8b, v7.8b +; CHECK-NEXT: usubl v2.8h, v2.8b, v6.8b +; CHECK-NEXT: shll v4.4s, v0.4h, #16 ; CHECK-NEXT: shll2 v0.4s, v0.8h, #16 -; CHECK-NEXT: saddw2 v2.4s, v2.4s, v3.8h -; CHECK-NEXT: saddw v3.4s, v4.4s, v3.4h +; CHECK-NEXT: shll v5.4s, v2.4h, #16 +; CHECK-NEXT: shll2 v2.4s, v2.8h, #16 ; CHECK-NEXT: saddw2 v0.4s, v0.4s, v1.8h -; CHECK-NEXT: saddw v1.4s, v5.4s, v1.4h -; CHECK-NEXT: uzp2 v5.4s, v3.4s, v2.4s -; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #12 -; CHECK-NEXT: zip1 v17.4s, v1.4s, v0.4s -; CHECK-NEXT: zip2 v6.4s, v1.4s, v0.4s -; CHECK-NEXT: zip2 v18.4s, v3.4s, v2.4s -; CHECK-NEXT: uzp2 v5.4s, v5.4s, v3.4s -; CHECK-NEXT: ext v19.16b, v1.16b, v17.16b, #8 -; CHECK-NEXT: mov v1.s[3], v0.s[2] -; CHECK-NEXT: zip2 v4.4s, v2.4s, v3.4s -; CHECK-NEXT: mov v7.16b, v3.16b -; CHECK-NEXT: ext v16.16b, v2.16b, v16.16b, #12 -; CHECK-NEXT: mov v7.s[0], v2.s[1] -; CHECK-NEXT: mov v2.s[1], v3.s[0] +; CHECK-NEXT: saddw v1.4s, v4.4s, v1.4h +; CHECK-NEXT: saddw2 v2.4s, v2.4s, v3.8h +; CHECK-NEXT: saddw v3.4s, v5.4s, v3.4h +; CHECK-NEXT: uzp2 v5.4s, v1.4s, v0.4s +; CHECK-NEXT: ext v17.16b, v1.16b, v1.16b, #12 +; CHECK-NEXT: zip1 v7.4s, v3.4s, v2.4s +; CHECK-NEXT: zip2 v6.4s, v3.4s, v2.4s +; CHECK-NEXT: zip2 v18.4s, v1.4s, v0.4s +; CHECK-NEXT: uzp2 v5.4s, v5.4s, v1.4s +; CHECK-NEXT: ext v19.16b, v3.16b, v7.16b, #8 +; CHECK-NEXT: mov v3.s[3], v2.s[2] +; CHECK-NEXT: zip2 v4.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v16.16b, v1.16b +; CHECK-NEXT: ext v17.16b, v0.16b, v17.16b, #12 +; CHECK-NEXT: mov v16.s[0], v0.s[1] +; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: mov v5.d[1], v6.d[1] -; CHECK-NEXT: mov v18.d[1], v1.d[1] -; CHECK-NEXT: mov v16.d[1], v6.d[1] -; CHECK-NEXT: mov v4.d[1], v1.d[1] -; CHECK-NEXT: mov v7.d[1], v17.d[1] -; CHECK-NEXT: mov v2.d[1], v19.d[1] -; CHECK-NEXT: add v1.4s, v5.4s, v18.4s -; CHECK-NEXT: sub v3.4s, v4.4s, v16.4s -; CHECK-NEXT: rev64 v4.4s, v1.4s -; CHECK-NEXT: add v0.4s, v7.4s, v2.4s -; CHECK-NEXT: sub v2.4s, v2.4s, v7.4s -; CHECK-NEXT: rev64 v5.4s, v0.4s -; CHECK-NEXT: mov v4.d[1], v1.d[1] -; CHECK-NEXT: add v6.4s, v3.4s, v2.4s -; CHECK-NEXT: sub v2.4s, v2.4s, v3.4s -; CHECK-NEXT: mov v5.d[1], v0.d[1] -; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s -; CHECK-NEXT: rev64 v7.4s, v2.4s +; CHECK-NEXT: mov v18.d[1], v3.d[1] +; CHECK-NEXT: mov v17.d[1], v6.d[1] +; CHECK-NEXT: mov v4.d[1], v3.d[1] +; CHECK-NEXT: mov v16.d[1], v7.d[1] +; CHECK-NEXT: mov v0.d[1], v19.d[1] +; CHECK-NEXT: add v2.4s, v5.4s, v18.4s +; CHECK-NEXT: sub v3.4s, v4.4s, v17.4s +; CHECK-NEXT: rev64 v4.4s, v2.4s +; CHECK-NEXT: add v1.4s, v16.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v16.4s +; CHECK-NEXT: rev64 v5.4s, v1.4s +; CHECK-NEXT: mov v4.d[1], v2.d[1] +; CHECK-NEXT: add v6.4s, v3.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s +; CHECK-NEXT: mov v5.d[1], v1.d[1] +; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s +; CHECK-NEXT: rev64 v7.4s, v0.4s ; CHECK-NEXT: rev64 v3.4s, v6.4s -; CHECK-NEXT: rev64 v4.4s, v0.4s -; CHECK-NEXT: add v1.4s, v1.4s, v5.4s -; CHECK-NEXT: sub v7.4s, v2.4s, v7.4s -; CHECK-NEXT: addp v5.4s, v1.4s, v6.4s -; CHECK-NEXT: addp v2.4s, v0.4s, v2.4s +; CHECK-NEXT: rev64 v4.4s, v1.4s +; CHECK-NEXT: add v2.4s, v2.4s, v5.4s +; CHECK-NEXT: sub v7.4s, v0.4s, v7.4s +; CHECK-NEXT: addp v5.4s, v2.4s, v6.4s +; CHECK-NEXT: addp v0.4s, v1.4s, v0.4s ; CHECK-NEXT: sub v3.4s, v6.4s, v3.4s -; CHECK-NEXT: rev64 v6.4s, v1.4s -; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s +; CHECK-NEXT: rev64 v6.4s, v2.4s +; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s ; CHECK-NEXT: zip1 v16.4s, v5.4s, v5.4s -; CHECK-NEXT: ext v17.16b, v2.16b, v7.16b, #4 +; CHECK-NEXT: ext v17.16b, v0.16b, v7.16b, #4 ; CHECK-NEXT: ext v18.16b, v5.16b, v3.16b, #4 -; CHECK-NEXT: ext v4.16b, v0.16b, v2.16b, #8 -; CHECK-NEXT: sub v1.4s, v1.4s, v6.4s -; CHECK-NEXT: ext v6.16b, v1.16b, v5.16b, #4 -; CHECK-NEXT: trn2 v1.4s, v16.4s, v1.4s -; CHECK-NEXT: zip2 v16.4s, v17.4s, v2.4s -; CHECK-NEXT: zip2 v17.4s, v18.4s, v5.4s -; CHECK-NEXT: ext v18.16b, v4.16b, v0.16b, #4 +; CHECK-NEXT: ext v4.16b, v1.16b, v0.16b, #8 +; CHECK-NEXT: sub v2.4s, v2.4s, v6.4s +; CHECK-NEXT: zip2 v17.4s, v17.4s, v0.4s +; CHECK-NEXT: ext v6.16b, v2.16b, v5.16b, #4 +; CHECK-NEXT: trn2 v2.4s, v16.4s, v2.4s +; CHECK-NEXT: zip2 v16.4s, v18.4s, v5.4s +; CHECK-NEXT: ext v18.16b, v4.16b, v1.16b, #4 +; CHECK-NEXT: ext v17.16b, v7.16b, v17.16b, #12 +; CHECK-NEXT: mov v7.s[2], v0.s[3] ; CHECK-NEXT: ext v6.16b, v6.16b, v6.16b, #4 -; CHECK-NEXT: ext v16.16b, v7.16b, v16.16b, #12 -; CHECK-NEXT: ext v17.16b, v3.16b, v17.16b, #12 -; CHECK-NEXT: mov v0.s[2], v2.s[1] +; CHECK-NEXT: mov v1.s[2], v0.s[1] ; CHECK-NEXT: uzp2 v4.4s, v4.4s, v18.4s -; CHECK-NEXT: mov v7.s[2], v2.s[3] +; CHECK-NEXT: ext v16.16b, v3.16b, v16.16b, #12 ; CHECK-NEXT: mov v3.s[2], v5.s[3] -; CHECK-NEXT: sub v18.4s, v1.4s, v6.4s +; CHECK-NEXT: sub v18.4s, v7.4s, v17.4s +; CHECK-NEXT: mov v7.s[1], v0.s[2] +; CHECK-NEXT: sub v19.4s, v1.4s, v4.4s +; CHECK-NEXT: sub v20.4s, v2.4s, v6.4s ; CHECK-NEXT: mov v6.s[0], v5.s[1] -; CHECK-NEXT: sub v19.4s, v0.4s, v4.4s -; CHECK-NEXT: sub v20.4s, v7.4s, v16.4s -; CHECK-NEXT: sub v21.4s, v3.4s, v17.4s -; CHECK-NEXT: mov v0.s[1], v2.s[0] -; CHECK-NEXT: mov v7.s[1], v2.s[2] +; CHECK-NEXT: mov v1.s[1], v0.s[0] +; CHECK-NEXT: sub v0.4s, v3.4s, v16.4s ; CHECK-NEXT: mov v3.s[1], v5.s[2] -; CHECK-NEXT: add v1.4s, v1.4s, v6.4s -; CHECK-NEXT: add v0.4s, v0.4s, v4.4s -; CHECK-NEXT: add v2.4s, v7.4s, v16.4s -; CHECK-NEXT: add v3.4s, v3.4s, v17.4s +; CHECK-NEXT: add v2.4s, v2.4s, v6.4s +; CHECK-NEXT: add v1.4s, v1.4s, v4.4s +; CHECK-NEXT: add v4.4s, v7.4s, v17.4s +; CHECK-NEXT: add v3.4s, v3.4s, v16.4s +; CHECK-NEXT: mov v4.d[1], v18.d[1] +; CHECK-NEXT: mov v3.d[1], v0.d[1] ; CHECK-NEXT: mov v2.d[1], v20.d[1] -; CHECK-NEXT: mov v3.d[1], v21.d[1] -; CHECK-NEXT: mov v1.d[1], v18.d[1] -; CHECK-NEXT: mov v0.d[1], v19.d[1] -; CHECK-NEXT: movi v4.8h, #1 +; CHECK-NEXT: mov v1.d[1], v19.d[1] +; CHECK-NEXT: movi v0.8h, #1 ; CHECK-NEXT: movi v17.2d, #0x00ffff0000ffff -; CHECK-NEXT: ushr v5.4s, v1.4s, #15 -; CHECK-NEXT: ushr v6.4s, v2.4s, #15 -; CHECK-NEXT: ushr v7.4s, v0.4s, #15 +; CHECK-NEXT: ushr v5.4s, v2.4s, #15 +; CHECK-NEXT: ushr v6.4s, v4.4s, #15 +; CHECK-NEXT: ushr v7.4s, v1.4s, #15 ; CHECK-NEXT: ushr v16.4s, v3.4s, #15 -; CHECK-NEXT: and v6.16b, v6.16b, v4.16b -; CHECK-NEXT: and v16.16b, v16.16b, v4.16b -; CHECK-NEXT: and v7.16b, v7.16b, v4.16b -; CHECK-NEXT: and v4.16b, v5.16b, v4.16b +; CHECK-NEXT: and v6.16b, v6.16b, v0.16b +; CHECK-NEXT: and v16.16b, v16.16b, v0.16b +; CHECK-NEXT: and v7.16b, v7.16b, v0.16b +; CHECK-NEXT: and v0.16b, v5.16b, v0.16b ; CHECK-NEXT: mul v5.4s, v6.4s, v17.4s ; CHECK-NEXT: mul v6.4s, v16.4s, v17.4s -; CHECK-NEXT: mul v4.4s, v4.4s, v17.4s +; CHECK-NEXT: mul v0.4s, v0.4s, v17.4s ; CHECK-NEXT: mul v7.4s, v7.4s, v17.4s -; CHECK-NEXT: add v2.4s, v5.4s, v2.4s +; CHECK-NEXT: add v4.4s, v5.4s, v4.4s ; CHECK-NEXT: add v3.4s, v6.4s, v3.4s -; CHECK-NEXT: add v1.4s, v4.4s, v1.4s -; CHECK-NEXT: add v0.4s, v7.4s, v0.4s -; CHECK-NEXT: eor v1.16b, v1.16b, v4.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v7.16b -; CHECK-NEXT: eor v3.16b, v3.16b, v6.16b -; CHECK-NEXT: eor v2.16b, v2.16b, v5.16b -; CHECK-NEXT: add v2.4s, v3.4s, v2.4s -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: add v2.4s, v0.4s, v2.4s +; CHECK-NEXT: add v1.4s, v7.4s, v1.4s +; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b +; CHECK-NEXT: eor v1.16b, v1.16b, v7.16b +; CHECK-NEXT: eor v2.16b, v3.16b, v6.16b +; CHECK-NEXT: eor v3.16b, v4.16b, v5.16b +; CHECK-NEXT: add v2.4s, v2.4s, v3.4s +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 @@ -236,131 +236,130 @@ define i32 @v2(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocapture noundef readonly %p2, i32 noundef %i2) { ; CHECK-LABEL: v2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: sxtw x10, w3 +; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 +; CHECK-NEXT: sxtw x11, w3 ; CHECK-NEXT: add x9, x0, x8 -; CHECK-NEXT: add x12, x2, x10 -; CHECK-NEXT: add x11, x9, x8 -; CHECK-NEXT: add x13, x12, x10 -; CHECK-NEXT: add x8, x11, x8 -; CHECK-NEXT: add x10, x13, x10 -; CHECK-NEXT: ldp s1, s0, [x9] +; CHECK-NEXT: add x12, x2, x11 +; CHECK-NEXT: add x10, x9, x8 +; CHECK-NEXT: add x8, x10, x8 +; CHECK-NEXT: ldp s1, s0, [x8] +; CHECK-NEXT: add x8, x12, x11 +; CHECK-NEXT: add x11, x8, x11 +; CHECK-NEXT: ldp s3, s2, [x9] +; CHECK-NEXT: ldp s5, s4, [x11] ; CHECK-NEXT: ldp s7, s6, [x12] -; CHECK-NEXT: ldp s3, s2, [x8] -; CHECK-NEXT: ldp s5, s4, [x10] -; CHECK-NEXT: ld1 { v5.s }[1], [x13], #4 -; CHECK-NEXT: ld1 { v3.s }[1], [x11], #4 +; CHECK-NEXT: ld1 { v5.s }[1], [x8], #4 +; CHECK-NEXT: ld1 { v1.s }[1], [x10], #4 ; CHECK-NEXT: ld1 { v7.s }[1], [x2], #4 -; CHECK-NEXT: ld1 { v1.s }[1], [x0], #4 -; CHECK-NEXT: ld1 { v4.s }[1], [x13] -; CHECK-NEXT: ld1 { v2.s }[1], [x11] +; CHECK-NEXT: ld1 { v3.s }[1], [x0], #4 +; CHECK-NEXT: ld1 { v4.s }[1], [x8] +; CHECK-NEXT: ld1 { v0.s }[1], [x10] ; CHECK-NEXT: ld1 { v6.s }[1], [x2] -; CHECK-NEXT: ld1 { v0.s }[1], [x0] -; CHECK-NEXT: usubl v3.8h, v3.8b, v5.8b -; CHECK-NEXT: usubl v2.8h, v2.8b, v4.8b -; CHECK-NEXT: usubl v1.8h, v1.8b, v7.8b -; CHECK-NEXT: usubl v0.8h, v0.8b, v6.8b -; CHECK-NEXT: shll v4.4s, v2.4h, #16 -; CHECK-NEXT: shll2 v2.4s, v2.8h, #16 -; CHECK-NEXT: shll v5.4s, v0.4h, #16 +; CHECK-NEXT: ld1 { v2.s }[1], [x0] +; CHECK-NEXT: usubl v1.8h, v1.8b, v5.8b +; CHECK-NEXT: usubl v0.8h, v0.8b, v4.8b +; CHECK-NEXT: usubl v3.8h, v3.8b, v7.8b +; CHECK-NEXT: usubl v2.8h, v2.8b, v6.8b +; CHECK-NEXT: shll v4.4s, v0.4h, #16 ; CHECK-NEXT: shll2 v0.4s, v0.8h, #16 -; CHECK-NEXT: saddw2 v2.4s, v2.4s, v3.8h -; CHECK-NEXT: saddw v3.4s, v4.4s, v3.4h +; CHECK-NEXT: shll v5.4s, v2.4h, #16 +; CHECK-NEXT: shll2 v2.4s, v2.8h, #16 ; CHECK-NEXT: saddw2 v0.4s, v0.4s, v1.8h -; CHECK-NEXT: saddw v1.4s, v5.4s, v1.4h -; CHECK-NEXT: uzp2 v5.4s, v3.4s, v2.4s -; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #12 -; CHECK-NEXT: zip1 v7.4s, v1.4s, v0.4s -; CHECK-NEXT: mov v16.16b, v3.16b -; CHECK-NEXT: zip2 v4.4s, v2.4s, v3.4s -; CHECK-NEXT: zip2 v6.4s, v1.4s, v0.4s -; CHECK-NEXT: zip2 v18.4s, v3.4s, v2.4s -; CHECK-NEXT: mov v16.s[0], v2.s[1] -; CHECK-NEXT: ext v19.16b, v1.16b, v7.16b, #8 -; CHECK-NEXT: ext v17.16b, v2.16b, v17.16b, #12 -; CHECK-NEXT: uzp2 v5.4s, v5.4s, v3.4s -; CHECK-NEXT: mov v1.s[3], v0.s[2] -; CHECK-NEXT: mov v2.s[1], v3.s[0] -; CHECK-NEXT: mov v16.d[1], v7.d[1] -; CHECK-NEXT: mov v5.d[1], v6.d[1] -; CHECK-NEXT: mov v18.d[1], v1.d[1] -; CHECK-NEXT: mov v2.d[1], v19.d[1] -; CHECK-NEXT: mov v4.d[1], v1.d[1] -; CHECK-NEXT: mov v17.d[1], v6.d[1] -; CHECK-NEXT: add v0.4s, v5.4s, v18.4s -; CHECK-NEXT: add v1.4s, v16.4s, v2.4s -; CHECK-NEXT: rev64 v3.4s, v0.4s -; CHECK-NEXT: rev64 v5.4s, v1.4s -; CHECK-NEXT: sub v2.4s, v2.4s, v16.4s +; CHECK-NEXT: saddw v1.4s, v4.4s, v1.4h +; CHECK-NEXT: saddw2 v2.4s, v2.4s, v3.8h +; CHECK-NEXT: saddw v3.4s, v5.4s, v3.4h +; CHECK-NEXT: uzp2 v6.4s, v1.4s, v0.4s +; CHECK-NEXT: ext v17.16b, v1.16b, v1.16b, #12 +; CHECK-NEXT: zip1 v5.4s, v3.4s, v2.4s +; CHECK-NEXT: mov v16.16b, v1.16b +; CHECK-NEXT: zip2 v4.4s, v0.4s, v1.4s +; CHECK-NEXT: zip2 v7.4s, v3.4s, v2.4s +; CHECK-NEXT: ext v18.16b, v3.16b, v5.16b, #8 +; CHECK-NEXT: mov v3.s[3], v2.s[2] +; CHECK-NEXT: zip2 v2.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v16.s[0], v0.s[1] +; CHECK-NEXT: uzp2 v6.4s, v6.4s, v1.4s +; CHECK-NEXT: ext v17.16b, v0.16b, v17.16b, #12 +; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: mov v16.d[1], v5.d[1] +; CHECK-NEXT: mov v6.d[1], v7.d[1] +; CHECK-NEXT: mov v2.d[1], v3.d[1] +; CHECK-NEXT: mov v0.d[1], v18.d[1] +; CHECK-NEXT: mov v4.d[1], v3.d[1] +; CHECK-NEXT: mov v17.d[1], v7.d[1] +; CHECK-NEXT: add v1.4s, v6.4s, v2.4s +; CHECK-NEXT: add v2.4s, v16.4s, v0.4s +; CHECK-NEXT: rev64 v3.4s, v1.4s +; CHECK-NEXT: rev64 v5.4s, v2.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v16.4s ; CHECK-NEXT: sub v4.4s, v4.4s, v17.4s -; CHECK-NEXT: mov v3.d[1], v0.d[1] -; CHECK-NEXT: mov v5.d[1], v1.d[1] -; CHECK-NEXT: add v6.4s, v4.4s, v2.4s -; CHECK-NEXT: sub v2.4s, v2.4s, v4.4s -; CHECK-NEXT: sub v1.4s, v1.4s, v3.4s -; CHECK-NEXT: add v0.4s, v0.4s, v5.4s -; CHECK-NEXT: zip1 v3.4s, v1.4s, v2.4s -; CHECK-NEXT: zip1 v4.4s, v0.4s, v6.4s -; CHECK-NEXT: uzp2 v5.4s, v0.4s, v6.4s -; CHECK-NEXT: mov v17.16b, v1.16b -; CHECK-NEXT: zip2 v7.4s, v0.4s, v6.4s -; CHECK-NEXT: ext v16.16b, v1.16b, v3.16b, #8 -; CHECK-NEXT: trn2 v4.4s, v0.4s, v4.4s -; CHECK-NEXT: uzp2 v5.4s, v5.4s, v0.4s -; CHECK-NEXT: zip2 v1.4s, v1.4s, v2.4s -; CHECK-NEXT: mov v17.s[3], v2.s[2] -; CHECK-NEXT: mov v0.s[1], v6.s[1] -; CHECK-NEXT: mov v4.d[1], v16.d[1] -; CHECK-NEXT: mov v5.d[1], v1.d[1] -; CHECK-NEXT: mov v7.d[1], v17.d[1] -; CHECK-NEXT: mov v0.d[1], v3.d[1] -; CHECK-NEXT: movi v1.8h, #1 -; CHECK-NEXT: add v2.4s, v7.4s, v5.4s -; CHECK-NEXT: add v3.4s, v0.4s, v4.4s -; CHECK-NEXT: sub v5.4s, v5.4s, v7.4s -; CHECK-NEXT: sub v0.4s, v4.4s, v0.4s -; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #4 -; CHECK-NEXT: zip2 v6.4s, v0.4s, v3.4s -; CHECK-NEXT: zip2 v7.4s, v5.4s, v2.4s -; CHECK-NEXT: zip1 v16.4s, v2.4s, v5.4s -; CHECK-NEXT: zip2 v17.4s, v2.4s, v5.4s -; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #4 -; CHECK-NEXT: add v6.4s, v6.4s, v7.4s -; CHECK-NEXT: zip2 v7.4s, v3.4s, v0.4s -; CHECK-NEXT: zip1 v3.4s, v3.4s, v0.4s -; CHECK-NEXT: ext v0.16b, v4.16b, v0.16b, #8 -; CHECK-NEXT: ext v5.16b, v2.16b, v5.16b, #8 -; CHECK-NEXT: sub v7.4s, v17.4s, v7.4s -; CHECK-NEXT: sub v3.4s, v16.4s, v3.4s -; CHECK-NEXT: ext v0.16b, v0.16b, v4.16b, #4 -; CHECK-NEXT: ext v2.16b, v5.16b, v2.16b, #4 +; CHECK-NEXT: mov v3.d[1], v1.d[1] +; CHECK-NEXT: mov v5.d[1], v2.d[1] +; CHECK-NEXT: add v6.4s, v4.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s +; CHECK-NEXT: sub v2.4s, v2.4s, v3.4s +; CHECK-NEXT: add v1.4s, v1.4s, v5.4s +; CHECK-NEXT: zip1 v3.4s, v2.4s, v0.4s +; CHECK-NEXT: zip1 v4.4s, v1.4s, v6.4s +; CHECK-NEXT: uzp2 v7.4s, v1.4s, v6.4s +; CHECK-NEXT: zip2 v5.4s, v1.4s, v6.4s +; CHECK-NEXT: zip2 v16.4s, v2.4s, v0.4s +; CHECK-NEXT: trn2 v4.4s, v1.4s, v4.4s +; CHECK-NEXT: ext v17.16b, v2.16b, v3.16b, #8 +; CHECK-NEXT: uzp2 v7.4s, v7.4s, v1.4s +; CHECK-NEXT: mov v2.s[3], v0.s[2] +; CHECK-NEXT: mov v1.s[1], v6.s[1] +; CHECK-NEXT: mov v4.d[1], v17.d[1] +; CHECK-NEXT: mov v7.d[1], v16.d[1] +; CHECK-NEXT: mov v5.d[1], v2.d[1] +; CHECK-NEXT: mov v1.d[1], v3.d[1] +; CHECK-NEXT: add v0.4s, v5.4s, v7.4s +; CHECK-NEXT: add v2.4s, v1.4s, v4.4s +; CHECK-NEXT: sub v1.4s, v4.4s, v1.4s +; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #4 +; CHECK-NEXT: ext v16.16b, v0.16b, v0.16b, #4 +; CHECK-NEXT: sub v3.4s, v7.4s, v5.4s +; CHECK-NEXT: zip2 v5.4s, v1.4s, v2.4s +; CHECK-NEXT: zip1 v6.4s, v0.4s, v3.4s +; CHECK-NEXT: zip2 v7.4s, v0.4s, v3.4s +; CHECK-NEXT: zip2 v0.4s, v3.4s, v0.4s +; CHECK-NEXT: zip1 v17.4s, v2.4s, v1.4s +; CHECK-NEXT: zip2 v2.4s, v2.4s, v1.4s +; CHECK-NEXT: ext v1.16b, v4.16b, v1.16b, #8 +; CHECK-NEXT: ext v3.16b, v16.16b, v3.16b, #8 +; CHECK-NEXT: add v0.4s, v5.4s, v0.4s +; CHECK-NEXT: movi v5.8h, #1 +; CHECK-NEXT: ext v1.16b, v1.16b, v4.16b, #4 +; CHECK-NEXT: ext v3.16b, v3.16b, v16.16b, #4 +; CHECK-NEXT: sub v2.4s, v7.4s, v2.4s +; CHECK-NEXT: sub v4.4s, v6.4s, v17.4s +; CHECK-NEXT: ushr v6.4s, v0.4s, #15 +; CHECK-NEXT: add v1.4s, v1.4s, v3.4s +; CHECK-NEXT: ushr v7.4s, v2.4s, #15 ; CHECK-NEXT: movi v17.2d, #0x00ffff0000ffff -; CHECK-NEXT: ushr v5.4s, v3.4s, #15 -; CHECK-NEXT: ushr v4.4s, v6.4s, #15 -; CHECK-NEXT: ushr v16.4s, v7.4s, #15 +; CHECK-NEXT: and v3.16b, v6.16b, v5.16b +; CHECK-NEXT: ushr v6.4s, v4.4s, #15 +; CHECK-NEXT: ushr v16.4s, v1.4s, #15 +; CHECK-NEXT: and v7.16b, v7.16b, v5.16b +; CHECK-NEXT: and v16.16b, v16.16b, v5.16b +; CHECK-NEXT: and v5.16b, v6.16b, v5.16b +; CHECK-NEXT: mul v3.4s, v3.4s, v17.4s +; CHECK-NEXT: mul v6.4s, v7.4s, v17.4s +; CHECK-NEXT: mul v5.4s, v5.4s, v17.4s +; CHECK-NEXT: mul v7.4s, v16.4s, v17.4s +; CHECK-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-NEXT: add v2.4s, v6.4s, v2.4s +; CHECK-NEXT: add v4.4s, v5.4s, v4.4s +; CHECK-NEXT: add v1.4s, v7.4s, v1.4s +; CHECK-NEXT: eor v4.16b, v4.16b, v5.16b +; CHECK-NEXT: eor v1.16b, v1.16b, v7.16b +; CHECK-NEXT: eor v2.16b, v2.16b, v6.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v5.16b, v5.16b, v1.16b -; CHECK-NEXT: mul v2.4s, v5.4s, v17.4s -; CHECK-NEXT: ushr v5.4s, v0.4s, #15 -; CHECK-NEXT: and v4.16b, v4.16b, v1.16b -; CHECK-NEXT: and v16.16b, v16.16b, v1.16b -; CHECK-NEXT: and v1.16b, v5.16b, v1.16b -; CHECK-NEXT: mul v4.4s, v4.4s, v17.4s -; CHECK-NEXT: mul v16.4s, v16.4s, v17.4s -; CHECK-NEXT: mul v1.4s, v1.4s, v17.4s -; CHECK-NEXT: add v3.4s, v2.4s, v3.4s -; CHECK-NEXT: add v5.4s, v4.4s, v6.4s -; CHECK-NEXT: add v6.4s, v16.4s, v7.4s +; CHECK-NEXT: add v1.4s, v1.4s, v4.4s ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: eor v2.16b, v3.16b, v2.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b -; CHECK-NEXT: eor v1.16b, v6.16b, v16.16b -; CHECK-NEXT: eor v3.16b, v5.16b, v4.16b -; CHECK-NEXT: add v1.4s, v3.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: lsr w9, w8, #16 @@ -468,24 +467,24 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x8, w3 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: add x10, x2, x8 -; CHECK-NEXT: add x11, x0, x9 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: sxtw x9, w3 +; CHECK-NEXT: add x10, x0, x8 +; CHECK-NEXT: add x11, x2, x9 ; CHECK-NEXT: add x12, x10, x8 ; CHECK-NEXT: add x13, x11, x9 -; CHECK-NEXT: add x8, x12, x8 ; CHECK-NEXT: add x9, x13, x9 -; CHECK-NEXT: ldp s0, s6, [x11] -; CHECK-NEXT: ldp s3, s7, [x10] -; CHECK-NEXT: ldp s1, s5, [x8] -; CHECK-NEXT: ldp s2, s4, [x9] -; CHECK-NEXT: ld1 { v1.s }[1], [x12], #4 -; CHECK-NEXT: ld1 { v2.s }[1], [x13], #4 +; CHECK-NEXT: add x8, x12, x8 +; CHECK-NEXT: ldp s0, s6, [x10] +; CHECK-NEXT: ldp s1, s5, [x9] +; CHECK-NEXT: ldp s2, s4, [x8] +; CHECK-NEXT: ldp s3, s7, [x11] +; CHECK-NEXT: ld1 { v1.s }[1], [x13], #4 +; CHECK-NEXT: ld1 { v2.s }[1], [x12], #4 ; CHECK-NEXT: ld1 { v3.s }[1], [x2], #4 ; CHECK-NEXT: ld1 { v0.s }[1], [x0], #4 -; CHECK-NEXT: ld1 { v5.s }[1], [x12] -; CHECK-NEXT: ld1 { v4.s }[1], [x13] +; CHECK-NEXT: ld1 { v5.s }[1], [x13] +; CHECK-NEXT: ld1 { v4.s }[1], [x12] ; CHECK-NEXT: ld1 { v7.s }[1], [x2] ; CHECK-NEXT: ld1 { v6.s }[1], [x0] ; CHECK-NEXT: usubl v0.8h, v0.8b, v3.8b @@ -544,48 +543,48 @@ ; CHECK-NEXT: mov v0.d[1], v4.d[1] ; CHECK-NEXT: add v1.4s, v6.4s, v16.4s ; CHECK-NEXT: sub v2.4s, v16.4s, v6.4s -; CHECK-NEXT: add v7.4s, v3.4s, v0.4s -; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #4 +; CHECK-NEXT: add v4.4s, v3.4s, v0.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s -; CHECK-NEXT: ext v3.16b, v7.16b, v7.16b, #4 -; CHECK-NEXT: zip1 v4.4s, v1.4s, v2.4s -; CHECK-NEXT: zip2 v5.4s, v1.4s, v2.4s +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #4 +; CHECK-NEXT: ext v6.16b, v4.16b, v4.16b, #4 +; CHECK-NEXT: zip1 v5.4s, v1.4s, v2.4s +; CHECK-NEXT: zip2 v7.4s, v1.4s, v2.4s +; CHECK-NEXT: ext v16.16b, v3.16b, v2.16b, #8 ; CHECK-NEXT: zip2 v1.4s, v2.4s, v1.4s -; CHECK-NEXT: zip2 v16.4s, v0.4s, v7.4s -; CHECK-NEXT: zip1 v17.4s, v7.4s, v0.4s -; CHECK-NEXT: zip2 v7.4s, v7.4s, v0.4s -; CHECK-NEXT: ext v2.16b, v6.16b, v2.16b, #8 -; CHECK-NEXT: ext v0.16b, v3.16b, v0.16b, #8 -; CHECK-NEXT: add v1.4s, v16.4s, v1.4s -; CHECK-NEXT: movi v16.8h, #1 +; CHECK-NEXT: ext v2.16b, v6.16b, v0.16b, #8 +; CHECK-NEXT: zip2 v17.4s, v0.4s, v4.4s +; CHECK-NEXT: ext v3.16b, v16.16b, v3.16b, #4 +; CHECK-NEXT: zip2 v16.4s, v4.4s, v0.4s ; CHECK-NEXT: ext v2.16b, v2.16b, v6.16b, #4 -; CHECK-NEXT: ext v0.16b, v0.16b, v3.16b, #4 -; CHECK-NEXT: sub v3.4s, v5.4s, v7.4s -; CHECK-NEXT: sub v4.4s, v4.4s, v17.4s -; CHECK-NEXT: ushr v5.4s, v1.4s, #15 -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ushr v6.4s, v3.4s, #15 +; CHECK-NEXT: zip1 v0.4s, v4.4s, v0.4s +; CHECK-NEXT: add v1.4s, v17.4s, v1.4s +; CHECK-NEXT: sub v4.4s, v7.4s, v16.4s +; CHECK-NEXT: add v2.4s, v2.4s, v3.4s +; CHECK-NEXT: sub v0.4s, v5.4s, v0.4s +; CHECK-NEXT: movi v3.8h, #1 ; CHECK-NEXT: movi v17.2d, #0x00ffff0000ffff -; CHECK-NEXT: and v2.16b, v5.16b, v16.16b -; CHECK-NEXT: ushr v5.4s, v4.4s, #15 -; CHECK-NEXT: ushr v7.4s, v0.4s, #15 -; CHECK-NEXT: and v6.16b, v6.16b, v16.16b -; CHECK-NEXT: and v7.16b, v7.16b, v16.16b -; CHECK-NEXT: and v5.16b, v5.16b, v16.16b -; CHECK-NEXT: mul v2.4s, v2.4s, v17.4s -; CHECK-NEXT: mul v6.4s, v6.4s, v17.4s -; CHECK-NEXT: mul v5.4s, v5.4s, v17.4s +; CHECK-NEXT: ushr v5.4s, v0.4s, #15 +; CHECK-NEXT: ushr v6.4s, v1.4s, #15 +; CHECK-NEXT: ushr v7.4s, v2.4s, #15 +; CHECK-NEXT: ushr v16.4s, v4.4s, #15 +; CHECK-NEXT: and v6.16b, v6.16b, v3.16b +; CHECK-NEXT: and v16.16b, v16.16b, v3.16b +; CHECK-NEXT: and v7.16b, v7.16b, v3.16b +; CHECK-NEXT: and v3.16b, v5.16b, v3.16b +; CHECK-NEXT: mul v5.4s, v6.4s, v17.4s +; CHECK-NEXT: mul v6.4s, v16.4s, v17.4s +; CHECK-NEXT: mul v3.4s, v3.4s, v17.4s ; CHECK-NEXT: mul v7.4s, v7.4s, v17.4s -; CHECK-NEXT: add v1.4s, v2.4s, v1.4s -; CHECK-NEXT: add v3.4s, v6.4s, v3.4s -; CHECK-NEXT: add v4.4s, v5.4s, v4.4s -; CHECK-NEXT: add v0.4s, v7.4s, v0.4s -; CHECK-NEXT: eor v4.16b, v4.16b, v5.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v7.16b -; CHECK-NEXT: eor v3.16b, v3.16b, v6.16b -; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b +; CHECK-NEXT: add v1.4s, v5.4s, v1.4s +; CHECK-NEXT: add v4.4s, v6.4s, v4.4s +; CHECK-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-NEXT: add v2.4s, v7.4s, v2.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b +; CHECK-NEXT: eor v2.16b, v2.16b, v7.16b +; CHECK-NEXT: eor v3.16b, v4.16b, v6.16b +; CHECK-NEXT: eor v1.16b, v1.16b, v5.16b ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s -; CHECK-NEXT: add v0.4s, v0.4s, v4.4s +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 Index: llvm/test/CodeGen/AArch64/reduce-xor.ll =================================================================== --- llvm/test/CodeGen/AArch64/reduce-xor.ll +++ llvm/test/CodeGen/AArch64/reduce-xor.ll @@ -86,16 +86,16 @@ ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: fmov w13, s7 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -263,16 +263,16 @@ ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: fmov w13, s7 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w0, w8, w9 ; GISEL-NEXT: ret %xor_result = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a) Index: llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll =================================================================== --- llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll +++ llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -26,8 +26,8 @@ ; CHECK-NEXT: bl _bar ; CHECK-NEXT: ldurb w8, [x29, #-1] ; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: and x0, x8, #0xff ; CHECK-NEXT: sturb w8, [x29, #-1] +; CHECK-NEXT: and x0, x8, #0xff ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/rotate-extract.ll =================================================================== --- llvm/test/CodeGen/AArch64/rotate-extract.ll +++ llvm/test/CodeGen/AArch64/rotate-extract.ll @@ -127,15 +127,15 @@ define i32 @no_extract_udiv(i32 %i) nounwind { ; CHECK-LABEL: no_extract_udiv: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #33437 -; CHECK-NEXT: mov w9, #43691 -; CHECK-NEXT: movk w8, #21399, lsl #16 -; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w9, #33437 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: movk w9, #21399, lsl #16 ; CHECK-NEXT: umull x8, w0, w8 ; CHECK-NEXT: umull x9, w0, w9 -; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: lsr x9, x9, #33 -; CHECK-NEXT: extr w0, w9, w8, #4 +; CHECK-NEXT: lsr x8, x8, #33 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: extr w0, w8, w9, #4 ; CHECK-NEXT: ret %lhs_div = udiv i32 %i, 3 %rhs_div = udiv i32 %i, 49 Index: llvm/test/CodeGen/AArch64/sadd_sat_plus.ll =================================================================== --- llvm/test/CodeGen/AArch64/sadd_sat_plus.ll +++ llvm/test/CodeGen/AArch64/sadd_sat_plus.ll @@ -37,12 +37,12 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { ; CHECK-LABEL: func16: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sxth w10, w0 -; CHECK-NEXT: mov w8, #32767 -; CHECK-NEXT: add w9, w10, w9, sxth -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: add w8, w9, w8, sxth +; CHECK-NEXT: mov w9, #32767 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lt ; CHECK-NEXT: mov w9, #-32768 ; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w0, w8, w9, gt @@ -55,12 +55,12 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sxtb w10, w0 -; CHECK-NEXT: mov w8, #127 -; CHECK-NEXT: add w9, w10, w9, sxtb -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: add w8, w9, w8, sxtb +; CHECK-NEXT: mov w9, #127 +; CHECK-NEXT: cmp w8, #127 +; CHECK-NEXT: csel w8, w8, w9, lt ; CHECK-NEXT: mov w9, #-128 ; CHECK-NEXT: cmn w8, #128 ; CHECK-NEXT: csel w0, w8, w9, gt @@ -73,13 +73,13 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; CHECK-LABEL: func4: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sbfx w10, w0, #0, #4 -; CHECK-NEXT: mov w8, #7 -; CHECK-NEXT: lsl w9, w9, #28 -; CHECK-NEXT: add w9, w10, w9, asr #28 -; CHECK-NEXT: cmp w9, #7 -; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sbfx w9, w0, #0, #4 +; CHECK-NEXT: lsl w8, w8, #28 +; CHECK-NEXT: add w8, w9, w8, asr #28 +; CHECK-NEXT: mov w9, #7 +; CHECK-NEXT: cmp w8, #7 +; CHECK-NEXT: csel w8, w8, w9, lt ; CHECK-NEXT: mov w9, #-8 ; CHECK-NEXT: cmn w8, #8 ; CHECK-NEXT: csel w0, w8, w9, gt Index: llvm/test/CodeGen/AArch64/sadd_sat_vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -54,9 +54,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqadd v0.16b, v0.16b, v4.16b ; CHECK-NEXT: sqadd v1.16b, v1.16b, v5.16b +; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqadd v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -85,9 +85,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqadd v0.8h, v0.8h, v4.8h ; CHECK-NEXT: sqadd v1.8h, v1.8h, v5.8h +; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqadd v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -97,9 +97,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: sqadd v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -133,15 +133,15 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-LABEL: v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.b }[0], [x1] -; CHECK-NEXT: add x8, x1, #1 -; CHECK-NEXT: ld1 { v1.b }[0], [x0] -; CHECK-NEXT: add x9, x0, #1 +; CHECK-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-NEXT: add x8, x0, #1 +; CHECK-NEXT: ld1 { v1.b }[0], [x1] +; CHECK-NEXT: add x9, x1, #1 ; CHECK-NEXT: ld1 { v0.b }[4], [x8] ; CHECK-NEXT: ld1 { v1.b }[4], [x9] ; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: sqadd v0.2s, v1.2s, v0.2s +; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -158,9 +158,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: sqadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -173,15 +173,15 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-LABEL: v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.h }[0], [x1] -; CHECK-NEXT: add x8, x1, #2 -; CHECK-NEXT: ld1 { v1.h }[0], [x0] -; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-NEXT: add x9, x1, #2 ; CHECK-NEXT: ld1 { v0.h }[2], [x8] ; CHECK-NEXT: ld1 { v1.h }[2], [x9] ; CHECK-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sqadd v0.2s, v1.2s, v0.2s +; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -224,9 +224,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x1] -; CHECK-NEXT: ldr b1, [x0] -; CHECK-NEXT: sqadd v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: ldr b1, [x1] +; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -239,9 +239,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x1] -; CHECK-NEXT: ldr h1, [x0] -; CHECK-NEXT: sqadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ldr h1, [x1] +; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -307,9 +307,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqadd v0.4s, v0.4s, v4.4s ; CHECK-NEXT: sqadd v1.4s, v1.4s, v5.4s +; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqadd v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -338,9 +338,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqadd v0.2d, v0.2d, v4.2d ; CHECK-NEXT: sqadd v1.2d, v1.2d, v5.2d +; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqadd v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) @@ -352,21 +352,21 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adds x8, x2, x6 ; CHECK-NEXT: adcs x9, x3, x7 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x2, x11, x8, ne -; CHECK-NEXT: eor x8, x11, #0x8000000000000000 +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: cset w11, vs +; CHECK-NEXT: cmp w11, #0 +; CHECK-NEXT: csel x2, x10, x8, ne +; CHECK-NEXT: eor x8, x10, #0x8000000000000000 ; CHECK-NEXT: csel x3, x8, x9, ne ; CHECK-NEXT: adds x8, x0, x4 ; CHECK-NEXT: adcs x9, x1, x5 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: eor x10, x11, #0x8000000000000000 -; CHECK-NEXT: csel x8, x11, x8, ne -; CHECK-NEXT: csel x1, x10, x9, ne +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: cset w11, vs +; CHECK-NEXT: cmp w11, #0 +; CHECK-NEXT: csel x8, x10, x8, ne +; CHECK-NEXT: eor x11, x10, #0x8000000000000000 ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: csel x1, x11, x9, ne ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sat-add.ll =================================================================== --- llvm/test/CodeGen/AArch64/sat-add.ll +++ llvm/test/CodeGen/AArch64/sat-add.ll @@ -217,9 +217,9 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) { ; CHECK-LABEL: unsigned_sat_variable_i16_using_min: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: cmp w9, w8, uxth +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: cmp w8, w9, uxth ; CHECK-NEXT: csinv w8, w0, w1, lo ; CHECK-NEXT: add w0, w8, w1 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/select-constant-xor.ll =================================================================== --- llvm/test/CodeGen/AArch64/select-constant-xor.ll +++ llvm/test/CodeGen/AArch64/select-constant-xor.ll @@ -200,8 +200,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: asr w8, w0, #31 ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: eor w8, w8, #0x7f ; CHECK-NEXT: csel w9, w2, w1, lt +; CHECK-NEXT: eor w8, w8, #0x7f ; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret %c = icmp sle i32 %a, -1 Index: llvm/test/CodeGen/AArch64/select-with-and-or.ll =================================================================== --- llvm/test/CodeGen/AArch64/select-with-and-or.ll +++ llvm/test/CodeGen/AArch64/select-with-and-or.ll @@ -56,9 +56,9 @@ define <4 x i1> @and_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: and_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -70,9 +70,9 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: or_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -84,9 +84,9 @@ define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: and_not_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v0.16b, v2.16b, v0.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -98,9 +98,9 @@ define <4 x i1> @or_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: or_not_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: orn v0.16b, v2.16b, v0.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -112,9 +112,9 @@ define <4 x i1> @and_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: and_vec_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -126,9 +126,9 @@ define <4 x i1> @or_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: or_vec_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -140,9 +140,9 @@ define <4 x i1> @and_not_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: and_not_vec_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v0.16b, v2.16b, v0.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -154,9 +154,9 @@ define <4 x i1> @or_not_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: or_not_vec_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: orn v0.16b, v2.16b, v0.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y Index: llvm/test/CodeGen/AArch64/select_const.ll =================================================================== --- llvm/test/CodeGen/AArch64/select_const.ll +++ llvm/test/CodeGen/AArch64/select_const.ll @@ -498,13 +498,13 @@ define double @sel_constants_fadd_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_fadd_constant: ; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI42_0 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI42_0] ; CHECK-NEXT: mov x8, #7378697629483820646 -; CHECK-NEXT: adrp x9, .LCPI42_0 ; CHECK-NEXT: movk x8, #16444, lsl #48 -; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: ldr d1, [x9, :lo12:.LCPI42_0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcsel d0, d1, d0, ne +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 %bo = fadd double %sel, 5.1 @@ -514,12 +514,12 @@ define double @sel_constants_fsub_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_fsub_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #3689348814741910323 ; CHECK-NEXT: adrp x8, .LCPI43_0 -; CHECK-NEXT: movk x9, #49186, lsl #48 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI43_0] -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov x8, #3689348814741910323 +; CHECK-NEXT: movk x8, #49186, lsl #48 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcsel d0, d1, d0, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 @@ -530,12 +530,12 @@ define double @fsub_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: fsub_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #3689348814741910323 ; CHECK-NEXT: adrp x8, .LCPI44_0 -; CHECK-NEXT: movk x9, #16418, lsl #48 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI44_0] -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov x8, #3689348814741910323 +; CHECK-NEXT: movk x8, #16418, lsl #48 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcsel d0, d1, d0, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 @@ -546,12 +546,12 @@ define double @sel_constants_fmul_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_fmul_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #7378697629483820646 ; CHECK-NEXT: adrp x8, .LCPI45_0 -; CHECK-NEXT: movk x9, #49204, lsl #48 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI45_0] -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov x8, #7378697629483820646 +; CHECK-NEXT: movk x8, #49204, lsl #48 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcsel d0, d1, d0, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 @@ -564,8 +564,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI46_0 ; CHECK-NEXT: adrp x9, .LCPI46_1 -; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI46_0] +; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d1, [x9, :lo12:.LCPI46_1] ; CHECK-NEXT: fcsel d0, d1, d0, ne ; CHECK-NEXT: ret @@ -577,12 +577,12 @@ define double @fdiv_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: fdiv_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #7378697629483820646 ; CHECK-NEXT: adrp x8, .LCPI47_0 -; CHECK-NEXT: movk x9, #49140, lsl #48 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI47_0] -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov x8, #7378697629483820646 +; CHECK-NEXT: movk x8, #49140, lsl #48 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcsel d0, d1, d0, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 @@ -595,8 +595,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI48_0 ; CHECK-NEXT: fmov d1, #-4.00000000 -; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI48_0] +; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: fcsel d0, d1, d0, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 @@ -607,13 +607,13 @@ define double @frem_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: frem_constant_sel_constants: ; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI49_0 +; CHECK-NEXT: tst w0, #0x1 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI49_0] ; CHECK-NEXT: mov x8, #7378697629483820646 -; CHECK-NEXT: adrp x9, .LCPI49_0 ; CHECK-NEXT: movk x8, #16404, lsl #48 -; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: ldr d1, [x9, :lo12:.LCPI49_0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcsel d0, d1, d0, ne +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 %bo = frem double 5.1, %sel Index: llvm/test/CodeGen/AArch64/select_fmf.ll =================================================================== --- llvm/test/CodeGen/AArch64/select_fmf.ll +++ llvm/test/CodeGen/AArch64/select_fmf.ll @@ -7,11 +7,11 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: select_select_fold_select_and: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm s5, s1, s2 +; CHECK-NEXT: fminnm s4, s1, s2 ; CHECK-NEXT: fcmp s1, s2 ; CHECK-NEXT: fmaxnm s1, s0, s3 +; CHECK-NEXT: fccmp s4, s0, #4, lt ; CHECK-NEXT: fmov s4, #0.50000000 -; CHECK-NEXT: fccmp s5, s0, #4, lt ; CHECK-NEXT: fcsel s2, s1, s0, gt ; CHECK-NEXT: fadd s1, s0, s4 ; CHECK-NEXT: fadd s4, s1, s2 @@ -26,9 +26,9 @@ ; CHECK-NEXT: mov w9, #13107 ; CHECK-NEXT: movk w8, #48844, lsl #16 ; CHECK-NEXT: movk w9, #48819, lsl #16 -; CHECK-NEXT: fcmp s1, #0.0 ; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: fmov s4, w9 +; CHECK-NEXT: fcmp s1, #0.0 ; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s2, s3, s4 ; CHECK-NEXT: fcsel s0, s0, s2, gt @@ -65,11 +65,11 @@ define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: select_select_fold_select_or: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm s5, s1, s2 +; CHECK-NEXT: fminnm s4, s1, s2 ; CHECK-NEXT: fcmp s1, s2 ; CHECK-NEXT: fmaxnm s1, s0, s3 +; CHECK-NEXT: fccmp s4, s0, #0, ge ; CHECK-NEXT: fmov s4, #0.50000000 -; CHECK-NEXT: fccmp s5, s0, #0, ge ; CHECK-NEXT: fcsel s2, s0, s1, gt ; CHECK-NEXT: fadd s1, s0, s4 ; CHECK-NEXT: fadd s4, s1, s2 @@ -84,9 +84,9 @@ ; CHECK-NEXT: mov w9, #13107 ; CHECK-NEXT: movk w8, #48844, lsl #16 ; CHECK-NEXT: movk w9, #48819, lsl #16 -; CHECK-NEXT: fcmp s1, #0.0 ; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: fmov s4, w9 +; CHECK-NEXT: fcmp s1, #0.0 ; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s2, s3, s4 ; CHECK-NEXT: fcsel s0, s0, s2, gt Index: llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll =================================================================== --- llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -165,8 +165,8 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) { ; CHECK-LABEL: sel_shift_bool_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #128 ; CHECK-NEXT: shl v0.16b, v0.16b, #7 +; CHECK-NEXT: movi v1.16b, #128 ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -205,8 +205,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: mov w8, #65536 -; CHECK-NEXT: shl v0.2d, v0.2d, #63 ; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: shl v0.2d, v0.2d, #63 ; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/settag-merge-order.ll =================================================================== --- llvm/test/CodeGen/AArch64/settag-merge-order.ll +++ llvm/test/CodeGen/AArch64/settag-merge-order.ll @@ -40,8 +40,8 @@ if.then: ; CHECK: mov x8, #320 -; CHECK: sub x8, x8, #32 ; CHECK: st2g x9, [x9], #32 +; CHECK: sub x8, x8, #32 ; CHECK: cbnz x8, call void @llvm.aarch64.settag(i8* %a, i64 160) call void @llvm.aarch64.settag(i8* %a2, i64 160) @@ -49,8 +49,8 @@ if.else: ; CHECK: mov x8, #256 -; CHECK: sub x8, x8, #32 ; CHECK: st2g x9, [x9], #32 +; CHECK: sub x8, x8, #32 ; CHECK: cbnz x8, call void @llvm.aarch64.settag(i8* %c, i64 128) call void @llvm.aarch64.settag(i8* %c2, i64 128) Index: llvm/test/CodeGen/AArch64/settag-merge.ll =================================================================== --- llvm/test/CodeGen/AArch64/settag-merge.ll +++ llvm/test/CodeGen/AArch64/settag-merge.ll @@ -136,8 +136,8 @@ ; CHECK: tbz w0, #0, [[LABEL:.LBB.*]] ; CHECK: add x9, sp, # ; CHECK: mov x8, #256 -; CHECK: sub x8, x8, #32 ; CHECK: st2g x9, [x9], #32 +; CHECK: sub x8, x8, #32 ; CHECK: cbnz x8, ; CHECK: [[LABEL]]: ; CHECK: stg sp, [sp, # @@ -164,8 +164,8 @@ ; CHECK: tbz w0, #0, [[LABEL:.LBB.*]] ; CHECK: add x9, sp, # ; CHECK: mov x8, #1024 -; CHECK: sub x8, x8, #32 ; CHECK: st2g x9, [x9], #32 +; CHECK: sub x8, x8, #32 ; CHECK: cbnz x8, ; CHECK: [[LABEL]]: ; CHECK: stg sp, [sp, # @@ -192,8 +192,8 @@ ; CHECK-LABEL: stg128_128_gap_128_128: ; CHECK: mov x9, sp ; CHECK: mov x8, #256 -; CHECK: sub x8, x8, #32 ; CHECK: st2g x9, [x9], #32 +; CHECK: sub x8, x8, #32 ; CHECK: cbnz x8, ; CHECK: mov x8, #256 ; CHECK: st2g sp, [sp], #32 Index: llvm/test/CodeGen/AArch64/settag.ll =================================================================== --- llvm/test/CodeGen/AArch64/settag.ll +++ llvm/test/CodeGen/AArch64/settag.ll @@ -61,8 +61,8 @@ ; CHECK-NEXT: mov x8, #256 ; CHECK-NEXT: .LBB5_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub x8, x8, #32 ; CHECK-NEXT: st2g x0, [x0], #32 +; CHECK-NEXT: sub x8, x8, #32 ; CHECK-NEXT: cbnz x8, .LBB5_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: ret @@ -74,12 +74,12 @@ define void @stg17(i8* %p) { ; CHECK-LABEL: stg17: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #256 ; CHECK-NEXT: stg x0, [x0], #16 +; CHECK-NEXT: mov x8, #256 ; CHECK-NEXT: .LBB6_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub x8, x8, #32 ; CHECK-NEXT: st2g x0, [x0], #32 +; CHECK-NEXT: sub x8, x8, #32 ; CHECK-NEXT: cbnz x8, .LBB6_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: ret @@ -102,12 +102,12 @@ define void @stzg17(i8* %p) { ; CHECK-LABEL: stzg17: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #256 ; CHECK-NEXT: stzg x0, [x0], #16 +; CHECK-NEXT: mov x8, #256 ; CHECK-NEXT: .LBB8_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub x8, x8, #32 ; CHECK-NEXT: stz2g x0, [x0], #32 +; CHECK-NEXT: sub x8, x8, #32 ; CHECK-NEXT: cbnz x8, .LBB8_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: ret @@ -179,8 +179,8 @@ ; CHECK-NEXT: stg x9, [x9], #16 ; CHECK-NEXT: .LBB12_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub x8, x8, #32 ; CHECK-NEXT: st2g x9, [x9], #32 +; CHECK-NEXT: sub x8, x8, #32 ; CHECK-NEXT: cbnz x8, .LBB12_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: add sp, sp, #272 Index: llvm/test/CodeGen/AArch64/shift-amount-mod.ll =================================================================== --- llvm/test/CodeGen/AArch64/shift-amount-mod.ll +++ llvm/test/CodeGen/AArch64/shift-amount-mod.ll @@ -21,9 +21,9 @@ define i32 @load32_shl_by_negated(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_shl_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsl w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, i32* %valptr %negshamt = sub i32 32, %shamt @@ -45,9 +45,9 @@ define void @modify32_shl_by_negated(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_shl_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, i32* %valptr @@ -59,11 +59,11 @@ define void @modify32_shl_by_negated_multi_use(i32* %valptr, i32 %shamt, i32* %shamtptr) nounwind { ; CHECK-LABEL: modify32_shl_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #32 -; CHECK-NEXT: lsl w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: mov w9, #32 +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -88,9 +88,9 @@ define i64 @load64_shl_by_negated(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_shl_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsl x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsl x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, i64* %valptr %negshamt = sub i64 64, %shamt @@ -112,9 +112,9 @@ define void @modify64_shl_by_negated(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_shl_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsl x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, i64* %valptr @@ -126,11 +126,11 @@ define void @modify64_shl_by_negated_multi_use(i64* %valptr, i64 %shamt, i64* %shamtptr) nounwind { ; CHECK-LABEL: modify64_shl_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #64 -; CHECK-NEXT: lsl x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: mov w9, #64 +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret @@ -158,9 +158,9 @@ define i32 @load32_lshr_by_negated(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_lshr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, i32* %valptr %negshamt = sub i32 32, %shamt @@ -182,9 +182,9 @@ define void @modify32_lshr_by_negated(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_lshr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, i32* %valptr @@ -196,11 +196,11 @@ define void @modify32_lshr_by_negated_multi_use(i32* %valptr, i32 %shamt, i32* %shamtptr) nounwind { ; CHECK-LABEL: modify32_lshr_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #32 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 +; CHECK-NEXT: mov w9, #32 +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -225,9 +225,9 @@ define i64 @load64_lshr_by_negated(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_lshr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, i64* %valptr %negshamt = sub i64 64, %shamt @@ -249,9 +249,9 @@ define void @modify64_lshr_by_negated(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_lshr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, i64* %valptr @@ -263,11 +263,11 @@ define void @modify64_lshr_by_negated_multi_use(i64* %valptr, i64 %shamt, i64* %shamtptr) nounwind { ; CHECK-LABEL: modify64_lshr_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #64 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 +; CHECK-NEXT: mov w9, #64 +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret @@ -295,9 +295,9 @@ define i32 @load32_ashr_by_negated(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_ashr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: asr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: asr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, i32* %valptr %negshamt = sub i32 32, %shamt @@ -319,9 +319,9 @@ define void @modify32_ashr_by_negated(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_ashr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: asr w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: asr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, i32* %valptr @@ -333,11 +333,11 @@ define void @modify32_ashr_by_negated_multi_use(i32* %valptr, i32 %shamt, i32* %shamtptr) nounwind { ; CHECK-LABEL: modify32_ashr_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #32 -; CHECK-NEXT: asr w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: asr w8, w8, w9 +; CHECK-NEXT: mov w9, #32 +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -362,9 +362,9 @@ define i64 @load64_ashr_by_negated(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_ashr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: asr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: asr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, i64* %valptr %negshamt = sub i64 64, %shamt @@ -386,9 +386,9 @@ define void @modify64_ashr_by_negated(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_ashr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: asr x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: asr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, i64* %valptr @@ -400,11 +400,11 @@ define void @modify64_ashr_by_negated_multi_use(i64* %valptr, i64 %shamt, i64* %shamtptr) nounwind { ; CHECK-LABEL: modify64_ashr_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #64 -; CHECK-NEXT: asr x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: asr x8, x8, x9 +; CHECK-NEXT: mov w9, #64 +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret @@ -436,9 +436,9 @@ define i32 @load32_shl_by_complemented(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_shl_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsl w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsl w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, i32* %valptr %negshamt = sub i32 31, %shamt @@ -460,9 +460,9 @@ define void @modify32_shl_by_complemented(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_shl_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, i32* %valptr @@ -474,11 +474,11 @@ define void @modify32_shl_by_complemented_multi_use(i32* %valptr, i32 %shamt, i32* %shamtptr) nounwind { ; CHECK-LABEL: modify32_shl_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #31 -; CHECK-NEXT: lsl w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: mov w9, #31 +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -503,9 +503,9 @@ define i64 @load64_shl_by_complemented(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_shl_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsl x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsl x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, i64* %valptr %negshamt = sub i64 63, %shamt @@ -527,9 +527,9 @@ define void @modify64_shl_by_complemented(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_shl_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsl x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, i64* %valptr @@ -541,11 +541,11 @@ define void @modify64_shl_by_complemented_multi_use(i64* %valptr, i64 %shamt, i64* %shamtptr) nounwind { ; CHECK-LABEL: modify64_shl_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #63 -; CHECK-NEXT: lsl x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: mov w9, #63 +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret @@ -573,9 +573,9 @@ define i32 @load32_lshr_by_complemented(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_lshr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, i32* %valptr %negshamt = sub i32 31, %shamt @@ -597,9 +597,9 @@ define void @modify32_lshr_by_complemented(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_lshr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, i32* %valptr @@ -611,11 +611,11 @@ define void @modify32_lshr_by_complemented_multi_use(i32* %valptr, i32 %shamt, i32* %shamtptr) nounwind { ; CHECK-LABEL: modify32_lshr_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #31 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 +; CHECK-NEXT: mov w9, #31 +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -640,9 +640,9 @@ define i64 @load64_lshr_by_complemented(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_lshr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, i64* %valptr %negshamt = sub i64 63, %shamt @@ -664,9 +664,9 @@ define void @modify64_lshr_by_complemented(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_lshr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, i64* %valptr @@ -678,11 +678,11 @@ define void @modify64_lshr_by_complemented_multi_use(i64* %valptr, i64 %shamt, i64* %shamtptr) nounwind { ; CHECK-LABEL: modify64_lshr_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #63 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 +; CHECK-NEXT: mov w9, #63 +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret @@ -710,9 +710,9 @@ define i32 @load32_ashr_by_complemented(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_ashr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: asr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: asr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, i32* %valptr %negshamt = sub i32 31, %shamt @@ -734,9 +734,9 @@ define void @modify32_ashr_by_complemented(i32* %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_ashr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: asr w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: asr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, i32* %valptr @@ -748,11 +748,11 @@ define void @modify32_ashr_by_complemented_multi_use(i32* %valptr, i32 %shamt, i32* %shamtptr) nounwind { ; CHECK-LABEL: modify32_ashr_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #31 -; CHECK-NEXT: asr w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: asr w8, w8, w9 +; CHECK-NEXT: mov w9, #31 +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -777,9 +777,9 @@ define i64 @load64_ashr_by_complemented(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_ashr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: asr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: asr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, i64* %valptr %negshamt = sub i64 63, %shamt @@ -801,9 +801,9 @@ define void @modify64_ashr_by_complemented(i64* %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_ashr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: asr x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: asr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, i64* %valptr @@ -815,11 +815,11 @@ define void @modify64_ashr_by_complemented_multi_use(i64* %valptr, i64 %shamt, i64* %shamtptr) nounwind { ; CHECK-LABEL: modify64_ashr_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #63 -; CHECK-NEXT: asr x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: asr x8, x8, x9 +; CHECK-NEXT: mov w9, #63 +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/shift-by-signext.ll =================================================================== --- llvm/test/CodeGen/AArch64/shift-by-signext.ll +++ llvm/test/CodeGen/AArch64/shift-by-signext.ll @@ -80,11 +80,11 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind { ; CHECK-LABEL: n6_fshl: ; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w1, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr w9, w1, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsl w10, w0, w2 -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %shamt_wide = sext i8 %shamt to i32 @@ -94,11 +94,11 @@ define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind { ; CHECK-LABEL: n7_fshr: ; CHECK: // %bb.0: +; CHECK-NEXT: lsl w8, w0, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl w9, w0, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: lsl w8, w8, w9 ; CHECK-NEXT: orr w0, w8, w10 ; CHECK-NEXT: ret %shamt_wide = sext i8 %shamt to i32 Index: llvm/test/CodeGen/AArch64/shift_minsize.ll =================================================================== --- llvm/test/CodeGen/AArch64/shift_minsize.ll +++ llvm/test/CodeGen/AArch64/shift_minsize.ll @@ -88,16 +88,16 @@ ; ; CHECK-DARWIN-LABEL: shl128: ; CHECK-DARWIN: ; %bb.0: ; %entry -; CHECK-DARWIN-NEXT: mvn w8, w2 -; CHECK-DARWIN-NEXT: mov w9, w2 -; CHECK-DARWIN-NEXT: lsr x10, x0, #1 -; CHECK-DARWIN-NEXT: tst x9, #0x40 -; CHECK-DARWIN-NEXT: lsr x8, x10, x8 -; CHECK-DARWIN-NEXT: lsl x10, x1, x9 -; CHECK-DARWIN-NEXT: orr x8, x10, x8 -; CHECK-DARWIN-NEXT: lsl x10, x0, x9 -; CHECK-DARWIN-NEXT: csel x1, x10, x8, ne -; CHECK-DARWIN-NEXT: csel x0, xzr, x10, ne +; CHECK-DARWIN-NEXT: lsr x8, x0, #1 +; CHECK-DARWIN-NEXT: mvn w9, w2 +; CHECK-DARWIN-NEXT: mov w10, w2 +; CHECK-DARWIN-NEXT: lsr x8, x8, x9 +; CHECK-DARWIN-NEXT: lsl x9, x1, x10 +; CHECK-DARWIN-NEXT: lsl x11, x0, x10 +; CHECK-DARWIN-NEXT: tst x10, #0x40 +; CHECK-DARWIN-NEXT: orr x8, x9, x8 +; CHECK-DARWIN-NEXT: csel x1, x11, x8, ne +; CHECK-DARWIN-NEXT: csel x0, xzr, x11, ne ; CHECK-DARWIN-NEXT: ret entry: @@ -128,17 +128,17 @@ ; ; CHECK-DARWIN-LABEL: ashr128: ; CHECK-DARWIN: ; %bb.0: ; %entry -; CHECK-DARWIN-NEXT: mov w8, w2 -; CHECK-DARWIN-NEXT: mvn w9, w2 -; CHECK-DARWIN-NEXT: lsl x10, x1, #1 -; CHECK-DARWIN-NEXT: tst x8, #0x40 -; CHECK-DARWIN-NEXT: lsr x11, x0, x8 -; CHECK-DARWIN-NEXT: lsl x9, x10, x9 -; CHECK-DARWIN-NEXT: asr x10, x1, x8 -; CHECK-DARWIN-NEXT: orr x9, x9, x11 -; CHECK-DARWIN-NEXT: asr x8, x1, #63 -; CHECK-DARWIN-NEXT: csel x0, x10, x9, ne -; CHECK-DARWIN-NEXT: csel x1, x8, x10, ne +; CHECK-DARWIN-NEXT: lsl x8, x1, #1 +; CHECK-DARWIN-NEXT: mov w9, w2 +; CHECK-DARWIN-NEXT: mvn w10, w2 +; CHECK-DARWIN-NEXT: lsr x11, x0, x9 +; CHECK-DARWIN-NEXT: lsl x8, x8, x10 +; CHECK-DARWIN-NEXT: asr x10, x1, x9 +; CHECK-DARWIN-NEXT: asr x12, x1, #63 +; CHECK-DARWIN-NEXT: tst x9, #0x40 +; CHECK-DARWIN-NEXT: orr x8, x8, x11 +; CHECK-DARWIN-NEXT: csel x0, x10, x8, ne +; CHECK-DARWIN-NEXT: csel x1, x12, x10, ne ; CHECK-DARWIN-NEXT: ret entry: %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128 @@ -168,16 +168,16 @@ ; ; CHECK-DARWIN-LABEL: lshr128: ; CHECK-DARWIN: ; %bb.0: ; %entry -; CHECK-DARWIN-NEXT: mov w8, w2 -; CHECK-DARWIN-NEXT: mvn w9, w2 -; CHECK-DARWIN-NEXT: lsl x10, x1, #1 -; CHECK-DARWIN-NEXT: tst x8, #0x40 -; CHECK-DARWIN-NEXT: lsr x11, x0, x8 -; CHECK-DARWIN-NEXT: lsl x9, x10, x9 -; CHECK-DARWIN-NEXT: orr x9, x9, x11 -; CHECK-DARWIN-NEXT: lsr x10, x1, x8 -; CHECK-DARWIN-NEXT: csel x0, x10, x9, ne +; CHECK-DARWIN-NEXT: lsl x8, x1, #1 +; CHECK-DARWIN-NEXT: mov w9, w2 +; CHECK-DARWIN-NEXT: mvn w10, w2 +; CHECK-DARWIN-NEXT: lsr x11, x0, x9 +; CHECK-DARWIN-NEXT: lsl x8, x8, x10 +; CHECK-DARWIN-NEXT: lsr x10, x1, x9 +; CHECK-DARWIN-NEXT: tst x9, #0x40 +; CHECK-DARWIN-NEXT: orr x8, x8, x11 ; CHECK-DARWIN-NEXT: csel x1, xzr, x10, ne +; CHECK-DARWIN-NEXT: csel x0, x10, x8, ne ; CHECK-DARWIN-NEXT: ret entry: %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128 Index: llvm/test/CodeGen/AArch64/shuffle-tbl34.ll =================================================================== --- llvm/test/CodeGen/AArch64/shuffle-tbl34.ll +++ llvm/test/CodeGen/AArch64/shuffle-tbl34.ll @@ -21,11 +21,11 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret @@ -47,11 +47,11 @@ define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b ; CHECK-NEXT: ret @@ -101,17 +101,17 @@ define <16 x i8> @shuffle4_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: shuffle4_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: adrp x9, .LCPI2_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-NEXT: mov v2.d[1], v3.d[0] +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: mov v2.d[1], v3.d[0] +; CHECK-NEXT: adrp x8, .LCPI2_1 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI2_1] ; CHECK-NEXT: adrp x8, .LCPI2_2 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI2_1] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECK-NEXT: tbl v1.8b, { v2.16b }, v3.8b ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] @@ -178,10 +178,10 @@ ; CHECK-LABEL: shuffle4_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: adrp x9, .LCPI3_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: adrp x8, .LCPI3_1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: adrp x8, .LCPI3_2 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] ; CHECK-NEXT: tbl v1.16b, { v0.16b }, v1.16b ; CHECK-NEXT: tbl v0.16b, { v2.16b }, v3.16b ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_2] @@ -214,10 +214,10 @@ ; CHECK-LABEL: shuffle4_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d5, d2 -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: mov v4.d[1], v1.d[0] ; CHECK-NEXT: mov v5.d[1], v3.d[0] @@ -232,12 +232,11 @@ define <4 x i32> @shuffle4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: shuffle4_v4i32: ; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v1.4s, v1.4s, v1.4s ; CHECK-NEXT: rev64 v3.4s, v3.4s -; CHECK-NEXT: zip1 v4.4s, v1.4s, v1.4s -; CHECK-NEXT: zip2 v1.4s, v3.4s, v2.4s -; CHECK-NEXT: ext v0.16b, v4.16b, v0.16b, #4 -; CHECK-NEXT: mov v1.d[1], v0.d[1] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ext v1.16b, v1.16b, v0.16b, #4 +; CHECK-NEXT: zip2 v0.4s, v3.4s, v2.4s +; CHECK-NEXT: mov v0.d[1], v1.d[1] ; CHECK-NEXT: ret %x = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> %y = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> @@ -276,12 +275,12 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: shuffle4_v8i8_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: mov v2.d[1], v2.d[0] -; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] +; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: adrp x8, .LCPI6_1 ; CHECK-NEXT: tbl v3.8b, { v2.16b }, v1.8b ; CHECK-NEXT: tbl v2.8b, { v0.16b }, v1.8b @@ -315,12 +314,12 @@ define <8 x i8> @shuffle4_v8i8_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: shuffle4_v8i8_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: mov v2.d[1], v2.d[0] -; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: adrp x8, .LCPI7_1 ; CHECK-NEXT: tbl v2.8b, { v2.16b }, v1.8b ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b @@ -355,12 +354,12 @@ ; CHECK-LABEL: shuffle4_v4i8_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b +; CHECK-NEXT: uzp1 v1.8b, v2.8b, v3.8b ; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: uzp1 v2.8b, v2.8b, v3.8b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: ushll v3.8h, v0.8b, #0 -; CHECK-NEXT: ushll v4.8h, v2.8b, #0 -; CHECK-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v1.16b +; CHECK-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: ushll v3.8h, v1.8b, #0 +; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> %y = shufflevector <4 x i8> %c, <4 x i8> %d, <8 x i32> @@ -390,11 +389,11 @@ define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %ce, <4 x i16> %de) { ; CHECK-LABEL: shuffle4_v4i16_trunc: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] ; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret @@ -429,11 +428,11 @@ define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %ce, <4 x i32> %de) { ; CHECK-LABEL: shuffle4_v4i32_trunc: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: xtn v4.4h, v0.4s +; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: xtn v5.4h, v1.4s -; CHECK-NEXT: xtn v6.4h, v2.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: xtn v6.4h, v2.4s ; CHECK-NEXT: xtn v7.4h, v3.4s ; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b ; CHECK-NEXT: ret @@ -469,9 +468,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -501,9 +500,9 @@ ; CHECK-LABEL: shuffle3_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d3, d2 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: fmov d2, d0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: mov v2.d[1], v1.d[0] ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b @@ -559,15 +558,15 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: insert4_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: adrp x9, .LCPI14_1 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v4.16b, v3.16b +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_1] +; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: adrp x8, .LCPI14_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_1] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECK-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v2.16b ; CHECK-NEXT: trn1 v0.4h, v1.4h, v0.4h @@ -629,15 +628,15 @@ define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: insert4_v16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: adrp x8, .LCPI15_0 +; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q31_q0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v4.16b, v3.16b -; CHECK-NEXT: mov v3.16b, v1.16b -; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: mov v0.d[1], v2.d[0] -; CHECK-NEXT: tbl v31.16b, { v3.16b, v4.16b }, v5.16b +; CHECK-NEXT: tbl v31.16b, { v3.16b, v4.16b }, v1.16b ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b ; CHECK-NEXT: ret @@ -699,34 +698,34 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: frintm v0.2d, v0.2d ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: frintm v4.2d, v4.2d ; CHECK-NEXT: frintm v1.2d, v1.2d -; CHECK-NEXT: frintm v5.2d, v5.2d ; CHECK-NEXT: frintm v2.2d, v2.2d -; CHECK-NEXT: frintm v6.2d, v6.2d -; CHECK-NEXT: frintm v3.2d, v3.2d -; CHECK-NEXT: frintm v7.2d, v7.2d +; CHECK-NEXT: frintm v4.2d, v4.2d ; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: fcvtzs v4.2d, v4.2d ; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-NEXT: frintm v5.2d, v5.2d ; CHECK-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: frintm v6.2d, v6.2d +; CHECK-NEXT: frintm v3.2d, v3.2d +; CHECK-NEXT: frintm v7.2d, v7.2d ; CHECK-NEXT: xtn v16.2s, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v7.2d -; CHECK-NEXT: xtn v20.2s, v4.2d +; CHECK-NEXT: fcvtzs v0.2d, v4.2d ; CHECK-NEXT: xtn v17.2s, v1.2d -; CHECK-NEXT: xtn v21.2s, v5.2d +; CHECK-NEXT: fcvtzs v1.2d, v5.2d +; CHECK-NEXT: fcvtzs v4.2d, v6.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: xtn v18.2s, v2.2d -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: xtn v22.2s, v6.2d +; CHECK-NEXT: fcvtzs v2.2d, v7.2d +; CHECK-NEXT: xtn v20.2s, v0.2d +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: xtn v21.2s, v1.2d +; CHECK-NEXT: xtn v22.2s, v4.2d ; CHECK-NEXT: xtn v19.2s, v3.2d -; CHECK-NEXT: xtn v23.2s, v0.2d -; CHECK-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b -; CHECK-NEXT: uzp1 v0.8h, v2.8h, v1.8h -; CHECK-NEXT: uzp2 v1.8h, v2.8h, v1.8h +; CHECK-NEXT: xtn v23.2s, v2.2d +; CHECK-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b +; CHECK-NEXT: tbl v2.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-NEXT: ret %l214 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l213) %l215 = fptosi <2 x double> %l214 to <2 x i16> Index: llvm/test/CodeGen/AArch64/shuffles.ll =================================================================== --- llvm/test/CodeGen/AArch64/shuffles.ll +++ llvm/test/CodeGen/AArch64/shuffles.ll @@ -4,20 +4,20 @@ define <16 x i32> @test_shuf1(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: test_shuf1: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v16.16b, v6.16b, v1.16b, #4 -; CHECK-NEXT: dup v5.4s, v4.s[0] -; CHECK-NEXT: uzp1 v17.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp2 v18.4s, v2.4s, v4.4s +; CHECK-NEXT: ext v3.16b, v6.16b, v1.16b, #4 +; CHECK-NEXT: uzp1 v16.4s, v1.4s, v0.4s +; CHECK-NEXT: uzp2 v17.4s, v2.4s, v4.4s +; CHECK-NEXT: dup v4.4s, v4.s[0] +; CHECK-NEXT: trn2 v5.4s, v1.4s, v3.4s +; CHECK-NEXT: trn2 v1.4s, v16.4s, v1.4s +; CHECK-NEXT: trn1 v2.4s, v17.4s, v2.4s ; CHECK-NEXT: rev64 v3.4s, v7.4s -; CHECK-NEXT: trn2 v4.4s, v1.4s, v16.4s -; CHECK-NEXT: mov v5.s[0], v6.s[3] -; CHECK-NEXT: trn2 v1.4s, v17.4s, v1.4s -; CHECK-NEXT: trn1 v2.4s, v18.4s, v2.4s -; CHECK-NEXT: mov v4.s[0], v7.s[1] -; CHECK-NEXT: mov v3.d[0], v5.d[0] +; CHECK-NEXT: mov v4.s[0], v6.s[3] +; CHECK-NEXT: mov v5.s[0], v7.s[1] ; CHECK-NEXT: ext v1.16b, v0.16b, v1.16b, #12 ; CHECK-NEXT: mov v2.s[3], v7.s[0] -; CHECK-NEXT: mov v0.16b, v4.16b +; CHECK-NEXT: mov v3.d[0], v4.d[0] +; CHECK-NEXT: mov v0.16b, v5.16b ; CHECK-NEXT: ret %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <16 x i32> ret <16 x i32> %s3 @@ -26,10 +26,10 @@ define <4 x i32> @test_shuf2(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: test_shuf2: ; CHECK: // %bb.0: -; CHECK-NEXT: zip2 v2.4s, v7.4s, v6.4s +; CHECK-NEXT: zip2 v0.4s, v7.4s, v6.4s +; CHECK-NEXT: trn2 v2.4s, v7.4s, v0.4s ; CHECK-NEXT: ext v0.16b, v1.16b, v1.16b, #4 -; CHECK-NEXT: trn2 v1.4s, v7.4s, v2.4s -; CHECK-NEXT: mov v0.d[0], v1.d[0] +; CHECK-NEXT: mov v0.d[0], v2.d[0] ; CHECK-NEXT: ret %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> ret <4 x i32> %s3 @@ -60,8 +60,8 @@ define <4 x i32> @test_shuf5(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: test_shuf5: ; CHECK: // %bb.0: -; CHECK-NEXT: rev64 v0.4s, v7.4s ; CHECK-NEXT: ext v1.16b, v6.16b, v4.16b, #12 +; CHECK-NEXT: rev64 v0.4s, v7.4s ; CHECK-NEXT: mov v0.d[0], v1.d[0] ; CHECK-NEXT: ret %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> Index: llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll =================================================================== --- llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll +++ llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll @@ -159,8 +159,8 @@ ; CHECK-LABEL: vec_sink_add_of_const_to_add0: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS @@ -171,8 +171,8 @@ ; CHECK-LABEL: vec_sink_add_of_const_to_add1: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS @@ -187,8 +187,8 @@ ; CHECK-LABEL: vec_sink_sub_of_const_to_add0: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, @@ -199,8 +199,8 @@ ; CHECK-LABEL: vec_sink_sub_of_const_to_add1: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, @@ -215,8 +215,8 @@ ; CHECK-LABEL: vec_sink_sub_from_const_to_add0: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a @@ -227,8 +227,8 @@ ; CHECK-LABEL: vec_sink_sub_from_const_to_add1: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a @@ -243,8 +243,8 @@ ; CHECK-LABEL: vec_sink_add_of_const_to_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS @@ -255,8 +255,8 @@ ; CHECK-LABEL: vec_sink_add_of_const_to_sub2: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS @@ -271,8 +271,8 @@ ; CHECK-LABEL: vec_sink_sub_of_const_to_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI20_0 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, @@ -283,8 +283,8 @@ ; CHECK-LABEL: vec_sink_sub_of_const_to_sub2: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, @@ -299,8 +299,8 @@ ; CHECK-LABEL: vec_sink_sub_from_const_to_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a @@ -311,8 +311,8 @@ ; CHECK-LABEL: vec_sink_sub_from_const_to_sub2: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a Index: llvm/test/CodeGen/AArch64/sinksplat.ll =================================================================== --- llvm/test/CodeGen/AArch64/sinksplat.ll +++ llvm/test/CodeGen/AArch64/sinksplat.ll @@ -200,17 +200,17 @@ define <4 x i32> @mlal(<4 x i32> %x, <4 x i32> *%y) { ; CHECK-LABEL: mlal: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v1.4s, v1.s[3] +; CHECK-NEXT: dup v0.4s, v0.s[3] ; CHECK-NEXT: .LBB6_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: mla v0.4s, v2.4s, v1.4s +; CHECK-NEXT: mla v1.4s, v2.4s, v0.4s ; CHECK-NEXT: b.eq .LBB6_1 ; CHECK-NEXT: // %bb.2: // %l2 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> @@ -266,17 +266,17 @@ define <4 x float> @fmuladd(<4 x float> %x, <4 x float> *%y) { ; CHECK-LABEL: fmuladd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v1.4s, v1.s[3] +; CHECK-NEXT: dup v0.4s, v0.s[3] ; CHECK-NEXT: .LBB8_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: fmla v0.4s, v1.4s, v2.4s +; CHECK-NEXT: fmla v1.4s, v0.4s, v2.4s ; CHECK-NEXT: b.eq .LBB8_1 ; CHECK-NEXT: // %bb.2: // %l2 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> @@ -299,19 +299,19 @@ define <4 x float> @fma(<4 x float> %x, <4 x float> *%y) { ; CHECK-LABEL: fma: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v1.4s, v1.s[3] +; CHECK-NEXT: dup v0.4s, v0.s[3] ; CHECK-NEXT: .LBB9_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr q3, [x0] +; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: fmla v0.4s, v2.4s, v3.4s +; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: fmla v1.4s, v3.4s, v2.4s ; CHECK-NEXT: b.eq .LBB9_1 ; CHECK-NEXT: // %bb.2: // %l2 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> @@ -333,11 +333,10 @@ define <4 x i32> @smull_nonsplat(<4 x i16> %x, <4 x i16> *%y) { ; CHECK-LABEL: smull_nonsplat: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d1, d0 +; CHECK-NEXT: trn2 v1.4h, v0.4h, v0.4h ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: zip2 v1.4h, v1.4h, v0.4h ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: trn2 v2.4h, v1.4h, v1.4h -; CHECK-NEXT: zip2 v1.4h, v2.4h, v1.4h ; CHECK-NEXT: .LBB10_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0] Index: llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll =================================================================== --- llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll +++ llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll @@ -4,19 +4,19 @@ define <16 x double> @test_sitofp_fixed(<16 x i32> %in) { ; CHECK-LABEL: test_sitofp_fixed: ; CHECK: ; %bb.0: -; CHECK-NEXT: sshll2.2d v4, v2, #0 -; CHECK-NEXT: sshll2.2d v5, v0, #0 -; CHECK-NEXT: sshll2.2d v6, v1, #0 +; CHECK-NEXT: sshll2.2d v4, v0, #0 +; CHECK-NEXT: sshll2.2d v5, v1, #0 +; CHECK-NEXT: sshll2.2d v6, v2, #0 ; CHECK-NEXT: sshll2.2d v7, v3, #0 ; CHECK-NEXT: sshll.2d v0, v0, #0 ; CHECK-NEXT: sshll.2d v16, v1, #0 ; CHECK-NEXT: sshll.2d v17, v2, #0 ; CHECK-NEXT: sshll.2d v18, v3, #0 -; CHECK-NEXT: scvtf.2d v1, v5, #6 +; CHECK-NEXT: scvtf.2d v1, v4, #6 ; CHECK-NEXT: scvtf.2d v0, v0, #6 -; CHECK-NEXT: scvtf.2d v3, v6, #6 +; CHECK-NEXT: scvtf.2d v3, v5, #6 ; CHECK-NEXT: scvtf.2d v2, v16, #6 -; CHECK-NEXT: scvtf.2d v5, v4, #6 +; CHECK-NEXT: scvtf.2d v5, v6, #6 ; CHECK-NEXT: scvtf.2d v4, v17, #6 ; CHECK-NEXT: scvtf.2d v7, v7, #6 ; CHECK-NEXT: scvtf.2d v6, v18, #6 Index: llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll =================================================================== --- llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll +++ llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll @@ -18,10 +18,10 @@ define void @ld1b_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: ld1b_with_addr_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, wzr -; CHECK-NEXT: mov w13, w2 -; CHECK-NEXT: ld1b {za0h.b[w12, 0]}, p0/z, [x0, x1] -; CHECK-NEXT: ld1b {za0v.b[w13, 15]}, p0/z, [x0, x1] +; CHECK-NEXT: mov w13, wzr +; CHECK-NEXT: mov w12, w2 +; CHECK-NEXT: ld1b {za0h.b[w13, 0]}, p0/z, [x0, x1] +; CHECK-NEXT: ld1b {za0v.b[w12, 15]}, p0/z, [x0, x1] ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 15 @@ -66,16 +66,16 @@ define void @ld1w( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: ld1w: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, wzr -; CHECK-NEXT: mov w13, w1 -; CHECK-NEXT: ld1w {za0h.s[w12, 0]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za1h.s[w12, 0]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za2h.s[w12, 0]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za3h.s[w13, 3]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za0v.s[w12, 0]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za1v.s[w12, 0]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za2v.s[w13, 3]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za3v.s[w12, 0]}, p0/z, [x0] +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: mov w13, wzr +; CHECK-NEXT: ld1w {za0h.s[w13, 0]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za1h.s[w13, 0]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za2h.s[w13, 0]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za3h.s[w12, 3]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za0v.s[w13, 0]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za1v.s[w13, 0]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za2v.s[w12, 3]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za3v.s[w13, 0]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 3 call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 0, i32 0) @@ -107,8 +107,8 @@ define void @ld1d( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: ld1d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: ld1d {za0h.d[w13, 0]}, p0/z, [x0] ; CHECK-NEXT: ld1d {za1h.d[w13, 0]}, p0/z, [x0] ; CHECK-NEXT: ld1d {za2h.d[w13, 0]}, p0/z, [x0] Index: llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll =================================================================== --- llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll +++ llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll @@ -6,19 +6,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: mov z4.d, z0.d ; CHECK-NEXT: mov z1.b, p0/m, za0h.b[w12, 0] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 2] -; CHECK-NEXT: mov z3.b, p0/m, za0h.b[w12, 4] ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.b, p0/m, za0h.b[w12, 6] -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 4] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 6] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 8] -; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov z3.b, p0/m, za0h.b[w12, 10] -; CHECK-NEXT: mov z4.b, p0/m, za0h.b[w12, 12] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 10] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 12] ; CHECK-NEXT: mov z0.b, p0/m, za0h.b[w12, 14] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -45,19 +45,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: mov z4.d, z0.d ; CHECK-NEXT: mov z1.b, p0/m, za0v.b[w12, 1] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 3] -; CHECK-NEXT: mov z3.b, p0/m, za0v.b[w12, 5] ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.b, p0/m, za0v.b[w12, 7] -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 5] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 7] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 9] -; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov z3.b, p0/m, za0v.b[w12, 11] -; CHECK-NEXT: mov z4.b, p0/m, za0v.b[w12, 13] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 11] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 13] ; CHECK-NEXT: mov z0.b, p0/m, za0v.b[w12, 15] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -85,11 +85,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: mov z1.h, p0/m, za0h.h[w12, 0] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 2] -; CHECK-NEXT: mov z3.h, p0/m, za0h.h[w12, 4] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 4] ; CHECK-NEXT: mov z0.h, p0/m, za0h.h[w12, 6] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -108,11 +108,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: mov z1.h, p0/m, za1v.h[w12, 1] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.h, p0/m, za1v.h[w12, 3] -; CHECK-NEXT: mov z3.h, p0/m, za1v.h[w12, 5] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za1v.h[w12, 5] ; CHECK-NEXT: mov z0.h, p0/m, za1v.h[w12, 7] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -132,19 +132,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: mov z4.d, z0.d ; CHECK-NEXT: mov z1.h, p0/m, za0h.h[w12, 0] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 1] -; CHECK-NEXT: mov z3.h, p0/m, za0v.h[w12, 2] ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.h, p0/m, za0v.h[w12, 3] -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 2] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 3] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 4] -; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov z3.h, p0/m, za0h.h[w12, 5] -; CHECK-NEXT: mov z4.h, p0/m, za0v.h[w12, 6] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 5] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 6] ; CHECK-NEXT: mov z0.h, p0/m, za0v.h[w12, 7] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -171,19 +171,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: mov z4.d, z0.d ; CHECK-NEXT: mov z1.h, p0/m, za0h.h[w12, 0] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 1] -; CHECK-NEXT: mov z3.h, p0/m, za0v.h[w12, 2] ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.h, p0/m, za0v.h[w12, 3] -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 2] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 3] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 4] -; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov z3.h, p0/m, za0h.h[w12, 5] -; CHECK-NEXT: mov z4.h, p0/m, za0v.h[w12, 6] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 5] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 6] ; CHECK-NEXT: mov z0.h, p0/m, za0v.h[w12, 7] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -241,11 +241,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 0] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.s, p0/m, za0h.s[w12, 1] -; CHECK-NEXT: mov z3.s, p0/m, za0v.s[w12, 2] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.s, p0/m, za0v.s[w12, 2] ; CHECK-NEXT: mov z0.s, p0/m, za0v.s[w12, 3] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll +++ llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll @@ -445,8 +445,8 @@ ; CHECK-NEXT: mov z0.s, #0 // =0x0 ; CHECK-NEXT: .LBB28_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subs w1, w1, #3 ; CHECK-NEXT: mov za0h.s[w12, 0], p0/m, z0.s +; CHECK-NEXT: subs w1, w1, #3 ; CHECK-NEXT: mov za0h.s[w12, 1], p0/m, z0.s ; CHECK-NEXT: mov za0h.s[w12, 2], p0/m, z0.s ; CHECK-NEXT: b.ne .LBB28_1 Index: llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll =================================================================== --- llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll +++ llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll @@ -18,10 +18,10 @@ define void @st1b_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: st1b_with_addr_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, wzr -; CHECK-NEXT: mov w13, w2 -; CHECK-NEXT: st1b {za0h.b[w12, 0]}, p0, [x0, x1] -; CHECK-NEXT: st1b {za0v.b[w13, 15]}, p0, [x0, x1] +; CHECK-NEXT: mov w13, wzr +; CHECK-NEXT: mov w12, w2 +; CHECK-NEXT: st1b {za0h.b[w13, 0]}, p0, [x0, x1] +; CHECK-NEXT: st1b {za0v.b[w12, 15]}, p0, [x0, x1] ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 15 @@ -92,10 +92,10 @@ define void @st1w_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: st1w_with_addr_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, wzr -; CHECK-NEXT: mov w13, w2 -; CHECK-NEXT: st1w {za0h.s[w12, 0]}, p0, [x0, x1, lsl #2] -; CHECK-NEXT: st1w {za3v.s[w13, 3]}, p0, [x0, x1, lsl #2] +; CHECK-NEXT: mov w13, wzr +; CHECK-NEXT: mov w12, w2 +; CHECK-NEXT: st1w {za0h.s[w13, 0]}, p0, [x0, x1, lsl #2] +; CHECK-NEXT: st1w {za3v.s[w12, 3]}, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %base = getelementptr i32, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 3 Index: llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll =================================================================== --- llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll +++ llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll @@ -128,8 +128,8 @@ ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: and x1, x1, x16 ; CHECK-NEXT: csdb -; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp ; CHECK-NEXT: ld1 { v0.d }[0], [x1] +; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp ; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 ; CHECK-NEXT: mov sp, [[TMPREG]] ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/split-vector-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/split-vector-insert.ll +++ llvm/test/CodeGen/AArch64/split-vector-insert.ll @@ -43,8 +43,8 @@ ; CHECK-NEXT: cmp x8, #6 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: str q3, [x10, x9] -; CHECK-NEXT: mov w9, #6 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: mov w9, #6 ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: addvl x9, sp, #2 ; CHECK-NEXT: lsl x8, x8, #3 @@ -54,8 +54,8 @@ ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret @@ -100,8 +100,8 @@ ; CHECK-NEXT: cmp x8, #6 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: str q3, [x10, x9] -; CHECK-NEXT: mov w9, #6 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: mov w9, #6 ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: addvl x9, sp, #2 ; CHECK-NEXT: lsl x8, x8, #3 @@ -109,10 +109,10 @@ ; CHECK-NEXT: str q4, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] ; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sqrt-fastmath.ll =================================================================== --- llvm/test/CodeGen/AArch64/sqrt-fastmath.ll +++ llvm/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -543,16 +543,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: frsqrte d1, d0 ; CHECK-NEXT: mov x8, #4631107791820423168 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fmul d3, d1, d1 -; CHECK-NEXT: frsqrts d3, d0, d3 -; CHECK-NEXT: fmul d1, d1, d3 -; CHECK-NEXT: fmul d3, d1, d1 -; CHECK-NEXT: frsqrts d3, d0, d3 -; CHECK-NEXT: fmul d1, d1, d3 -; CHECK-NEXT: fmul d3, d1, d1 -; CHECK-NEXT: frsqrts d3, d0, d3 -; CHECK-NEXT: fmul d1, d1, d3 ; CHECK-NEXT: fmul d0, d0, d1 ; CHECK-NEXT: fmul d2, d1, d2 ; CHECK-NEXT: str d1, [x0] @@ -585,10 +585,7 @@ ; CHECK-LABEL: sqrt_simplify_before_recip_3_uses_order: ; CHECK: // %bb.0: ; CHECK-NEXT: frsqrte d1, d0 -; CHECK-NEXT: mov x9, #140737488355328 ; CHECK-NEXT: mov x8, #4631107791820423168 -; CHECK-NEXT: movk x9, #16453, lsl #48 -; CHECK-NEXT: fmov d3, x9 ; CHECK-NEXT: fmul d2, d1, d1 ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 @@ -599,6 +596,9 @@ ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: mov x8, #140737488355328 +; CHECK-NEXT: movk x8, #16453, lsl #48 +; CHECK-NEXT: fmov d3, x8 ; CHECK-NEXT: fmul d0, d0, d1 ; CHECK-NEXT: fmul d2, d1, d2 ; CHECK-NEXT: fmul d1, d1, d3 @@ -620,11 +620,11 @@ ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt d0, d0 ; FAULT-NEXT: fmov d1, #1.00000000 -; FAULT-NEXT: mov x9, #140737488355328 ; FAULT-NEXT: mov x8, #4631107791820423168 -; FAULT-NEXT: movk x9, #16453, lsl #48 ; FAULT-NEXT: fmov d2, x8 -; FAULT-NEXT: fmov d3, x9 +; FAULT-NEXT: mov x8, #140737488355328 +; FAULT-NEXT: movk x8, #16453, lsl #48 +; FAULT-NEXT: fmov d3, x8 ; FAULT-NEXT: fdiv d1, d1, d0 ; FAULT-NEXT: fmul d2, d1, d2 ; FAULT-NEXT: fmul d3, d1, d3 @@ -637,10 +637,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: frsqrte d1, d0 ; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: mov x9, #140737488355328 ; CHECK-NEXT: mov x8, #4631107791820423168 -; CHECK-NEXT: movk x9, #16453, lsl #48 -; CHECK-NEXT: fmov d3, x9 ; CHECK-NEXT: fmul d2, d1, d1 ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 @@ -651,12 +648,15 @@ ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmul d2, d0, d1 -; CHECK-NEXT: fmul d3, d1, d3 ; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: fcsel d2, d0, d2, eq ; CHECK-NEXT: fdiv d0, d0, d2 ; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: mov x8, #140737488355328 +; CHECK-NEXT: movk x8, #16453, lsl #48 +; CHECK-NEXT: fmov d3, x8 ; CHECK-NEXT: fmul d2, d1, d2 +; CHECK-NEXT: fmul d3, d1, d3 ; CHECK-NEXT: str d2, [x1] ; CHECK-NEXT: str d3, [x2] ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -23,12 +23,12 @@ define i1 @test_srem_even(i4 %X) nounwind { ; CHECK-LABEL: test_srem_even: ; CHECK: // %bb.0: -; CHECK-NEXT: sbfx w9, w0, #0, #4 -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: add w9, w9, w9, lsl #1 -; CHECK-NEXT: ubfx w10, w9, #7, #1 -; CHECK-NEXT: add w9, w10, w9, lsr #4 -; CHECK-NEXT: msub w8, w9, w8, w0 +; CHECK-NEXT: sbfx w8, w0, #0, #4 +; CHECK-NEXT: add w8, w8, w8, lsl #1 +; CHECK-NEXT: ubfx w9, w8, #7, #1 +; CHECK-NEXT: add w8, w9, w8, lsr #4 +; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: msub w8, w8, w9, w0 ; CHECK-NEXT: and w8, w8, #0xf ; CHECK-NEXT: cmp w8, #1 ; CHECK-NEXT: cset w0, eq @@ -57,50 +57,50 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { ; CHECK-LABEL: test_srem_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #7282 +; CHECK-NEXT: mov x12, #7282 ; CHECK-NEXT: sbfx x9, x0, #0, #33 -; CHECK-NEXT: movk x8, #29127, lsl #16 +; CHECK-NEXT: movk x12, #29127, lsl #16 +; CHECK-NEXT: sbfx x10, x1, #0, #33 +; CHECK-NEXT: movk x12, #50972, lsl #32 ; CHECK-NEXT: mov x11, #7281 -; CHECK-NEXT: movk x8, #50972, lsl #32 +; CHECK-NEXT: movk x12, #7281, lsl #48 ; CHECK-NEXT: movk x11, #29127, lsl #16 -; CHECK-NEXT: movk x8, #7281, lsl #48 +; CHECK-NEXT: smulh x13, x10, x12 +; CHECK-NEXT: sbfx x8, x2, #0, #33 +; CHECK-NEXT: smulh x12, x9, x12 ; CHECK-NEXT: movk x11, #50972, lsl #32 -; CHECK-NEXT: sbfx x12, x1, #0, #33 -; CHECK-NEXT: sbfx x10, x2, #0, #33 -; CHECK-NEXT: smulh x13, x9, x8 ; CHECK-NEXT: movk x11, #7281, lsl #48 -; CHECK-NEXT: smulh x8, x12, x8 -; CHECK-NEXT: smulh x11, x10, x11 +; CHECK-NEXT: smulh x11, x8, x11 ; CHECK-NEXT: add x13, x13, x13, lsr #63 -; CHECK-NEXT: sub x11, x11, x10 -; CHECK-NEXT: add x8, x8, x8, lsr #63 +; CHECK-NEXT: add x12, x12, x12, lsr #63 ; CHECK-NEXT: add x13, x13, x13, lsl #3 +; CHECK-NEXT: add x12, x12, x12, lsl #3 +; CHECK-NEXT: sub x11, x11, x8 ; CHECK-NEXT: asr x14, x11, #3 -; CHECK-NEXT: sub x9, x9, x13 -; CHECK-NEXT: add x11, x14, x11, lsr #63 -; CHECK-NEXT: add x8, x8, x8, lsl #3 -; CHECK-NEXT: sub x8, x12, x8 -; CHECK-NEXT: add x11, x11, x11, lsl #3 +; CHECK-NEXT: sub x10, x10, x13 +; CHECK-NEXT: sub x9, x9, x12 +; CHECK-NEXT: mov x12, #8589934591 ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: add x10, x10, x11 -; CHECK-NEXT: mov x9, #8589934591 -; CHECK-NEXT: adrp x11, .LCPI3_0 -; CHECK-NEXT: adrp x12, .LCPI3_1 -; CHECK-NEXT: mov v0.d[1], x8 -; CHECK-NEXT: fmov d1, x10 -; CHECK-NEXT: dup v2.2d, x9 -; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q4, [x12, :lo12:.LCPI3_1] -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: cmeq v0.2d, v0.2d, v3.2d -; CHECK-NEXT: cmeq v1.2d, v1.2d, v4.2d +; CHECK-NEXT: add x11, x14, x11, lsr #63 +; CHECK-NEXT: adrp x9, .LCPI3_0 +; CHECK-NEXT: dup v1.2d, x12 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_0] +; CHECK-NEXT: adrp x9, .LCPI3_1 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: add x10, x11, x11, lsl #3 +; CHECK-NEXT: add x8, x8, x10 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmeq v0.2d, v0.2d, v2.2d +; CHECK-NEXT: fmov d2, x8 ; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: and v1.16b, v2.16b, v1.16b +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_1] ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: cmeq v1.2d, v1.2d, v2.2d +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: xtn v1.2s, v1.2d ; CHECK-NEXT: fmov w2, s1 ; CHECK-NEXT: ret %srem = srem <3 x i33> %X, Index: llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll +++ llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll @@ -6,15 +6,15 @@ ; CHECK-LABEL: test_srem_odd_even: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: adrp x9, .LCPI0_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: adrp x8, .LCPI0_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: adrp x8, .LCPI0_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] -; CHECK-NEXT: adrp x9, .LCPI0_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI0_3] +; CHECK-NEXT: adrp x8, .LCPI0_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_3] ; CHECK-NEXT: adrp x8, .LCPI0_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -43,8 +43,8 @@ ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -64,8 +64,8 @@ ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -83,11 +83,11 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] @@ -107,11 +107,11 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] @@ -130,15 +130,15 @@ ; CHECK-LABEL: test_srem_odd_even_allones_eq: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: adrp x9, .LCPI5_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: adrp x8, .LCPI5_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_1] ; CHECK-NEXT: adrp x8, .LCPI5_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_1] -; CHECK-NEXT: adrp x9, .LCPI5_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI5_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI5_3] +; CHECK-NEXT: adrp x8, .LCPI5_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] ; CHECK-NEXT: adrp x8, .LCPI5_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -156,15 +156,15 @@ ; CHECK-LABEL: test_srem_odd_even_allones_ne: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: adrp x9, .LCPI6_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] +; CHECK-NEXT: adrp x8, .LCPI6_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_1] ; CHECK-NEXT: adrp x8, .LCPI6_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_1] -; CHECK-NEXT: adrp x9, .LCPI6_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI6_3] +; CHECK-NEXT: adrp x8, .LCPI6_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_3] ; CHECK-NEXT: adrp x8, .LCPI6_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -186,15 +186,15 @@ ; CHECK-LABEL: test_srem_odd_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI7_0 -; CHECK-NEXT: adrp x9, .LCPI7_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: adrp x8, .LCPI7_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_1] ; CHECK-NEXT: adrp x8, .LCPI7_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_1] -; CHECK-NEXT: adrp x9, .LCPI7_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI7_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI7_3] +; CHECK-NEXT: adrp x8, .LCPI7_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_3] ; CHECK-NEXT: adrp x8, .LCPI7_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -214,15 +214,15 @@ ; CHECK-LABEL: test_srem_even_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: adrp x9, .LCPI8_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: adrp x8, .LCPI8_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_1] ; CHECK-NEXT: adrp x8, .LCPI8_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_1] -; CHECK-NEXT: adrp x9, .LCPI8_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI8_3] +; CHECK-NEXT: adrp x8, .LCPI8_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_3] ; CHECK-NEXT: adrp x8, .LCPI8_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -242,15 +242,15 @@ ; CHECK-LABEL: test_srem_odd_even_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: adrp x9, .LCPI9_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: adrp x8, .LCPI9_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_1] ; CHECK-NEXT: adrp x8, .LCPI9_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI9_1] -; CHECK-NEXT: adrp x9, .LCPI9_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI9_3] +; CHECK-NEXT: adrp x8, .LCPI9_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_3] ; CHECK-NEXT: adrp x8, .LCPI9_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -279,8 +279,8 @@ ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -298,11 +298,11 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] @@ -321,15 +321,15 @@ ; CHECK-LABEL: test_srem_odd_even_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: adrp x9, .LCPI12_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: adrp x8, .LCPI12_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_1] ; CHECK-NEXT: adrp x8, .LCPI12_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI12_1] -; CHECK-NEXT: adrp x9, .LCPI12_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI12_3] +; CHECK-NEXT: adrp x8, .LCPI12_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_3] ; CHECK-NEXT: adrp x8, .LCPI12_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -440,15 +440,15 @@ ; CHECK-LABEL: test_srem_odd_allones_and_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: adrp x9, .LCPI16_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: adrp x8, .LCPI16_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_1] ; CHECK-NEXT: adrp x8, .LCPI16_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_1] -; CHECK-NEXT: adrp x9, .LCPI16_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI16_3] +; CHECK-NEXT: adrp x8, .LCPI16_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_3] ; CHECK-NEXT: adrp x8, .LCPI16_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -468,15 +468,15 @@ ; CHECK-LABEL: test_srem_even_allones_and_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: adrp x9, .LCPI17_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: adrp x8, .LCPI17_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_1] ; CHECK-NEXT: adrp x8, .LCPI17_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI17_1] -; CHECK-NEXT: adrp x9, .LCPI17_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI17_3] +; CHECK-NEXT: adrp x8, .LCPI17_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_3] ; CHECK-NEXT: adrp x8, .LCPI17_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -496,15 +496,15 @@ ; CHECK-LABEL: test_srem_odd_even_allones_and_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: adrp x9, .LCPI18_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: adrp x8, .LCPI18_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_1] ; CHECK-NEXT: adrp x8, .LCPI18_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_1] -; CHECK-NEXT: adrp x9, .LCPI18_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI18_3] +; CHECK-NEXT: adrp x8, .LCPI18_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_3] ; CHECK-NEXT: adrp x8, .LCPI18_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -533,8 +533,8 @@ ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -552,11 +552,11 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] @@ -575,15 +575,15 @@ ; CHECK-LABEL: test_srem_odd_even_allones_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 -; CHECK-NEXT: adrp x9, .LCPI21_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] +; CHECK-NEXT: adrp x8, .LCPI21_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_1] ; CHECK-NEXT: adrp x8, .LCPI21_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI21_1] -; CHECK-NEXT: adrp x9, .LCPI21_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI21_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI21_3] +; CHECK-NEXT: adrp x8, .LCPI21_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_3] ; CHECK-NEXT: adrp x8, .LCPI21_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -605,15 +605,15 @@ ; CHECK-LABEL: test_srem_odd_poweroftwo_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 -; CHECK-NEXT: adrp x9, .LCPI22_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] +; CHECK-NEXT: adrp x8, .LCPI22_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_1] ; CHECK-NEXT: adrp x8, .LCPI22_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI22_1] -; CHECK-NEXT: adrp x9, .LCPI22_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI22_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI22_3] +; CHECK-NEXT: adrp x8, .LCPI22_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_3] ; CHECK-NEXT: adrp x8, .LCPI22_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -633,15 +633,15 @@ ; CHECK-LABEL: test_srem_even_poweroftwo_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 -; CHECK-NEXT: adrp x9, .LCPI23_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] +; CHECK-NEXT: adrp x8, .LCPI23_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_1] ; CHECK-NEXT: adrp x8, .LCPI23_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI23_1] -; CHECK-NEXT: adrp x9, .LCPI23_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI23_3] +; CHECK-NEXT: adrp x8, .LCPI23_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_3] ; CHECK-NEXT: adrp x8, .LCPI23_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -661,15 +661,15 @@ ; CHECK-LABEL: test_srem_odd_even_poweroftwo_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI24_0 -; CHECK-NEXT: adrp x9, .LCPI24_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0] +; CHECK-NEXT: adrp x8, .LCPI24_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_1] ; CHECK-NEXT: adrp x8, .LCPI24_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI24_1] -; CHECK-NEXT: adrp x9, .LCPI24_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI24_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI24_3] +; CHECK-NEXT: adrp x8, .LCPI24_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_3] ; CHECK-NEXT: adrp x8, .LCPI24_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -690,15 +690,15 @@ ; CHECK-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI25_0 -; CHECK-NEXT: adrp x9, .LCPI25_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] +; CHECK-NEXT: adrp x8, .LCPI25_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_1] ; CHECK-NEXT: adrp x8, .LCPI25_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI25_1] -; CHECK-NEXT: adrp x9, .LCPI25_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI25_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI25_3] +; CHECK-NEXT: adrp x8, .LCPI25_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_3] ; CHECK-NEXT: adrp x8, .LCPI25_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s @@ -717,15 +717,15 @@ ; CHECK-LABEL: test_srem_even_allones_and_poweroftwo_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI26_0 -; CHECK-NEXT: adrp x9, .LCPI26_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] +; CHECK-NEXT: adrp x8, .LCPI26_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_1] ; CHECK-NEXT: adrp x8, .LCPI26_2 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI26_1] -; CHECK-NEXT: adrp x9, .LCPI26_3 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI26_2] -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI26_3] +; CHECK-NEXT: adrp x8, .LCPI26_3 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_3] ; CHECK-NEXT: adrp x8, .LCPI26_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s Index: llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll +++ llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll @@ -12,10 +12,10 @@ ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: mov w8, #28834 -; CHECK-NEXT: movk w8, #2621, lsl #16 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: movk w8, #2621, lsl #16 ; CHECK-NEXT: dup v0.4s, w8 +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -33,12 +33,12 @@ ; CHECK-NEXT: mov w9, #47184 ; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: movk w9, #1310, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: mov w8, #23592 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: movk w8, #655, lsl #16 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: shl v0.4s, v2.4s, #30 ; CHECK-NEXT: ushr v1.4s, v2.4s, #2 ; CHECK-NEXT: dup v2.4s, w8 @@ -65,10 +65,10 @@ ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: mov w8, #28834 -; CHECK-NEXT: movk w8, #2621, lsl #16 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: movk w8, #2621, lsl #16 ; CHECK-NEXT: dup v0.4s, w8 +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -86,12 +86,12 @@ ; CHECK-NEXT: mov w9, #47184 ; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: movk w9, #1310, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: mov w8, #23592 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: movk w8, #655, lsl #16 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: shl v0.4s, v2.4s, #30 ; CHECK-NEXT: ushr v1.4s, v2.4s, #2 ; CHECK-NEXT: dup v2.4s, w8 @@ -114,8 +114,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v3.4s, #25 ; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: movi v3.4s, #25 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s @@ -137,8 +137,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v3.4s, #100 ; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: movi v3.4s, #100 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s @@ -184,9 +184,9 @@ define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_pow2: ; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v3.4s, v0.4s, #0 +; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 ; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: usra v2.4s, v3.4s, #28 +; CHECK-NEXT: usra v2.4s, v1.4s, #28 ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: bic v2.4s, #15 ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s @@ -203,11 +203,11 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_int_min: ; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: movi v3.4s, #128, lsl #24 -; CHECK-NEXT: usra v1.4s, v2.4s, #1 -; CHECK-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 +; CHECK-NEXT: movi v2.4s, #128, lsl #24 +; CHECK-NEXT: mov v3.16b, v0.16b +; CHECK-NEXT: usra v3.4s, v1.4s, #1 +; CHECK-NEXT: and v1.16b, v3.16b, v2.16b ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 Index: llvm/test/CodeGen/AArch64/srem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -5,47 +5,47 @@ ; CHECK-LABEL: fold_srem_vec_1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[1] ; CHECK-NEXT: smov w9, v0.h[0] -; CHECK-NEXT: mov w10, #63421 -; CHECK-NEXT: mov w11, #37253 -; CHECK-NEXT: movk w10, #31710, lsl #16 -; CHECK-NEXT: movk w11, #44150, lsl #16 -; CHECK-NEXT: smov w13, v0.h[2] -; CHECK-NEXT: mov w12, #33437 -; CHECK-NEXT: smull x10, w8, w10 -; CHECK-NEXT: movk w12, #21399, lsl #16 -; CHECK-NEXT: smull x11, w9, w11 -; CHECK-NEXT: lsr x10, x10, #32 +; CHECK-NEXT: mov w8, #37253 +; CHECK-NEXT: smov w10, v0.h[1] +; CHECK-NEXT: movk w8, #44150, lsl #16 +; CHECK-NEXT: mov w11, #63421 +; CHECK-NEXT: smov w12, v0.h[2] +; CHECK-NEXT: movk w11, #31710, lsl #16 +; CHECK-NEXT: mov w15, #33437 +; CHECK-NEXT: smull x8, w9, w8 +; CHECK-NEXT: movk w15, #21399, lsl #16 +; CHECK-NEXT: smull x11, w10, w11 +; CHECK-NEXT: mov w14, #95 +; CHECK-NEXT: lsr x8, x8, #32 ; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: sub w10, w10, w8 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: asr w14, w10, #6 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: asr w13, w8, #6 +; CHECK-NEXT: sub w11, w11, w10 +; CHECK-NEXT: add w8, w13, w8, lsr #31 +; CHECK-NEXT: smull x13, w12, w15 ; CHECK-NEXT: asr w15, w11, #6 -; CHECK-NEXT: add w10, w14, w10, lsr #31 +; CHECK-NEXT: msub w8, w8, w14, w9 +; CHECK-NEXT: mov w9, #-124 ; CHECK-NEXT: add w11, w15, w11, lsr #31 -; CHECK-NEXT: mov w14, #95 -; CHECK-NEXT: mov w15, #-124 -; CHECK-NEXT: smull x12, w13, w12 -; CHECK-NEXT: msub w9, w11, w14, w9 -; CHECK-NEXT: msub w8, w10, w15, w8 -; CHECK-NEXT: lsr x10, x12, #63 -; CHECK-NEXT: asr x11, x12, #37 -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: add w10, w11, w10 -; CHECK-NEXT: mov w11, #98 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov w9, #63249 -; CHECK-NEXT: movk w9, #48808, lsl #16 -; CHECK-NEXT: msub w10, w10, w11, w13 -; CHECK-NEXT: smull x9, w12, w9 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: lsr x8, x9, #63 -; CHECK-NEXT: asr x9, x9, #40 -; CHECK-NEXT: add w8, w9, w8 +; CHECK-NEXT: lsr x14, x13, #63 +; CHECK-NEXT: asr x13, x13, #37 +; CHECK-NEXT: msub w9, w11, w9, w10 +; CHECK-NEXT: smov w10, v0.h[3] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov w8, #63249 +; CHECK-NEXT: movk w8, #48808, lsl #16 +; CHECK-NEXT: add w11, w13, w14 +; CHECK-NEXT: smull x8, w10, w8 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: mov w9, #98 +; CHECK-NEXT: msub w9, w11, w9, w12 +; CHECK-NEXT: lsr x11, x8, #63 +; CHECK-NEXT: asr x8, x8, #40 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: add w8, w8, w11 ; CHECK-NEXT: mov w9, #-1003 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w8, w9, w12 +; CHECK-NEXT: msub w8, w8, w9, w10 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -57,40 +57,40 @@ ; CHECK-LABEL: fold_srem_vec_2: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w9, v0.h[0] +; CHECK-NEXT: smov w10, v0.h[0] +; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: mov w8, #37253 +; CHECK-NEXT: smov w12, v0.h[2] ; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: smov w14, v0.h[2] -; CHECK-NEXT: mov w12, #95 -; CHECK-NEXT: smull x11, w9, w8 +; CHECK-NEXT: smov w14, v0.h[3] ; CHECK-NEXT: smull x13, w10, w8 -; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: add w11, w11, w9 +; CHECK-NEXT: smull x11, w9, w8 +; CHECK-NEXT: smull x15, w12, w8 ; CHECK-NEXT: lsr x13, x13, #32 -; CHECK-NEXT: asr w15, w11, #6 -; CHECK-NEXT: add w13, w13, w10 -; CHECK-NEXT: add w11, w15, w11, lsr #31 -; CHECK-NEXT: smov w15, v0.h[3] -; CHECK-NEXT: asr w16, w13, #6 -; CHECK-NEXT: msub w9, w11, w12, w9 -; CHECK-NEXT: add w13, w16, w13, lsr #31 -; CHECK-NEXT: smull x11, w14, w8 -; CHECK-NEXT: msub w10, w13, w12, w10 +; CHECK-NEXT: smull x8, w14, w8 ; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: smull x8, w15, w8 -; CHECK-NEXT: add w11, w11, w14 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: asr w9, w11, #6 +; CHECK-NEXT: add w13, w13, w10 +; CHECK-NEXT: lsr x15, x15, #32 +; CHECK-NEXT: add w11, w11, w9 +; CHECK-NEXT: asr w17, w13, #6 +; CHECK-NEXT: asr w16, w11, #6 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w9, w9, w11, lsr #31 -; CHECK-NEXT: add w8, w8, w15 -; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: add w13, w17, w13, lsr #31 +; CHECK-NEXT: add w11, w16, w11, lsr #31 +; CHECK-NEXT: mov w16, #95 +; CHECK-NEXT: msub w10, w13, w16, w10 +; CHECK-NEXT: add w13, w15, w12 +; CHECK-NEXT: msub w9, w11, w16, w9 +; CHECK-NEXT: asr w11, w13, #6 +; CHECK-NEXT: add w8, w8, w14 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: add w10, w11, w13, lsr #31 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: msub w9, w10, w16, w12 ; CHECK-NEXT: asr w10, w8, #6 -; CHECK-NEXT: msub w9, w9, w12, w14 ; CHECK-NEXT: add w8, w10, w8, lsr #31 -; CHECK-NEXT: msub w8, w8, w12, w15 ; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: msub w8, w8, w16, w14 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -104,46 +104,46 @@ ; CHECK-LABEL: combine_srem_sdiv: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w9, v0.h[0] +; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: mov w8, #37253 ; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: smov w10, v0.h[1] +; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w11, v0.h[2] ; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: mov w14, #95 +; CHECK-NEXT: mov w16, #95 +; CHECK-NEXT: smull x14, w10, w8 ; CHECK-NEXT: smull x13, w9, w8 -; CHECK-NEXT: smull x15, w10, w8 +; CHECK-NEXT: smull x15, w11, w8 +; CHECK-NEXT: lsr x14, x14, #32 +; CHECK-NEXT: smull x8, w12, w8 ; CHECK-NEXT: lsr x13, x13, #32 -; CHECK-NEXT: smull x16, w11, w8 -; CHECK-NEXT: add w13, w13, w9 +; CHECK-NEXT: add w14, w14, w10 ; CHECK-NEXT: lsr x15, x15, #32 -; CHECK-NEXT: asr w17, w13, #6 -; CHECK-NEXT: add w15, w15, w10 -; CHECK-NEXT: add w13, w17, w13, lsr #31 +; CHECK-NEXT: asr w17, w14, #6 +; CHECK-NEXT: add w13, w13, w9 +; CHECK-NEXT: asr w18, w13, #6 +; CHECK-NEXT: add w15, w15, w11 +; CHECK-NEXT: add w14, w17, w14, lsr #31 ; CHECK-NEXT: asr w17, w15, #6 -; CHECK-NEXT: add w15, w17, w15, lsr #31 -; CHECK-NEXT: smull x8, w12, w8 -; CHECK-NEXT: msub w9, w13, w14, w9 -; CHECK-NEXT: lsr x16, x16, #32 -; CHECK-NEXT: add w16, w16, w11 -; CHECK-NEXT: msub w10, w15, w14, w10 -; CHECK-NEXT: asr w17, w16, #6 +; CHECK-NEXT: add w13, w18, w13, lsr #31 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: fmov s1, w13 -; CHECK-NEXT: add w16, w17, w16, lsr #31 -; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: msub w10, w14, w16, w10 +; CHECK-NEXT: fmov s0, w14 +; CHECK-NEXT: msub w9, w13, w16, w9 +; CHECK-NEXT: add w15, w17, w15, lsr #31 ; CHECK-NEXT: add w8, w8, w12 -; CHECK-NEXT: asr w9, w8, #6 -; CHECK-NEXT: add w8, w9, w8, lsr #31 -; CHECK-NEXT: msub w9, w16, w14, w11 -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: mov v1.h[1], w15 -; CHECK-NEXT: msub w10, w8, w14, w12 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: mov v1.h[2], w16 -; CHECK-NEXT: mov v0.h[3], w10 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: asr w18, w8, #6 +; CHECK-NEXT: mov v0.h[1], w13 +; CHECK-NEXT: msub w10, w15, w16, w11 +; CHECK-NEXT: add w8, w18, w8, lsr #31 +; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: msub w9, w8, w16, w12 +; CHECK-NEXT: mov v0.h[2], w15 +; CHECK-NEXT: mov v1.h[2], w10 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: mov v1.h[3], w9 +; CHECK-NEXT: add v0.4h, v1.4h, v0.4h ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, %2 = sdiv <4 x i16> %x, @@ -157,33 +157,33 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w9, v0.h[1] -; CHECK-NEXT: smov w10, v0.h[0] +; CHECK-NEXT: smov w11, v0.h[0] +; CHECK-NEXT: smov w10, v0.h[3] ; CHECK-NEXT: mov w8, #37253 -; CHECK-NEXT: smov w12, v0.h[2] ; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: negs w11, w9 +; CHECK-NEXT: smov w13, v0.h[2] +; CHECK-NEXT: negs w12, w9 ; CHECK-NEXT: and w9, w9, #0x1f -; CHECK-NEXT: and w11, w11, #0x1f -; CHECK-NEXT: csneg w9, w9, w11, mi -; CHECK-NEXT: negs w11, w10 -; CHECK-NEXT: and w10, w10, #0x3f +; CHECK-NEXT: and w12, w12, #0x1f +; CHECK-NEXT: smull x8, w10, w8 +; CHECK-NEXT: csneg w9, w9, w12, mi +; CHECK-NEXT: and w12, w11, #0x3f +; CHECK-NEXT: negs w11, w11 ; CHECK-NEXT: and w11, w11, #0x3f -; CHECK-NEXT: csneg w10, w10, w11, mi -; CHECK-NEXT: smov w11, v0.h[3] -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: negs w10, w12 -; CHECK-NEXT: smull x8, w11, w8 -; CHECK-NEXT: and w10, w10, #0x7 ; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: csneg w11, w12, w11, mi +; CHECK-NEXT: and w12, w13, #0x7 +; CHECK-NEXT: fmov s0, w11 +; CHECK-NEXT: add w8, w8, w10 +; CHECK-NEXT: asr w11, w8, #6 ; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: and w9, w12, #0x7 -; CHECK-NEXT: add w8, w8, w11 -; CHECK-NEXT: csneg w9, w9, w10, mi -; CHECK-NEXT: asr w10, w8, #6 -; CHECK-NEXT: add w8, w10, w8, lsr #31 -; CHECK-NEXT: mov w10, #95 +; CHECK-NEXT: negs w9, w13 +; CHECK-NEXT: and w9, w9, #0x7 +; CHECK-NEXT: add w8, w11, w8, lsr #31 +; CHECK-NEXT: csneg w9, w12, w9, mi ; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 +; CHECK-NEXT: mov w9, #95 +; CHECK-NEXT: msub w8, w8, w9, w10 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -197,39 +197,39 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v0.h[1] -; CHECK-NEXT: smov w9, v0.h[2] -; CHECK-NEXT: mov w10, #30865 +; CHECK-NEXT: mov w9, #30865 +; CHECK-NEXT: movk w9, #51306, lsl #16 +; CHECK-NEXT: smov w10, v0.h[2] ; CHECK-NEXT: mov w11, #17097 -; CHECK-NEXT: movk w10, #51306, lsl #16 +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: movk w11, #45590, lsl #16 -; CHECK-NEXT: mov w12, #654 -; CHECK-NEXT: smull x10, w8, w10 -; CHECK-NEXT: smull x11, w9, w11 -; CHECK-NEXT: lsr x10, x10, #32 +; CHECK-NEXT: smull x9, w8, w9 +; CHECK-NEXT: smull x11, w10, w11 +; CHECK-NEXT: lsr x9, x9, #32 ; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: add w10, w10, w8 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: asr w13, w10, #9 -; CHECK-NEXT: add w10, w13, w10, lsr #31 -; CHECK-NEXT: asr w13, w11, #4 -; CHECK-NEXT: add w11, w13, w11, lsr #31 -; CHECK-NEXT: smov w13, v0.h[3] -; CHECK-NEXT: msub w8, w10, w12, w8 -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: add w9, w9, w8 +; CHECK-NEXT: asr w12, w9, #9 +; CHECK-NEXT: add w11, w11, w10 +; CHECK-NEXT: add w9, w12, w9, lsr #31 +; CHECK-NEXT: mov w12, #654 +; CHECK-NEXT: msub w8, w9, w12, w8 +; CHECK-NEXT: smov w9, v0.h[3] +; CHECK-NEXT: asr w12, w11, #4 +; CHECK-NEXT: add w11, w12, w11, lsr #31 ; CHECK-NEXT: mov w12, #47143 -; CHECK-NEXT: mov w10, #23 ; CHECK-NEXT: movk w12, #24749, lsl #16 -; CHECK-NEXT: msub w9, w11, w10, w9 -; CHECK-NEXT: smull x10, w13, w12 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: lsr x8, x10, #63 -; CHECK-NEXT: asr x10, x10, #43 -; CHECK-NEXT: add w8, w10, w8 +; CHECK-NEXT: smull x12, w9, w12 +; CHECK-NEXT: mov v1.h[1], w8 +; CHECK-NEXT: mov w8, #23 +; CHECK-NEXT: msub w8, w11, w8, w10 +; CHECK-NEXT: lsr x10, x12, #63 +; CHECK-NEXT: asr x11, x12, #43 +; CHECK-NEXT: mov v1.h[2], w8 +; CHECK-NEXT: add w8, w11, w10 ; CHECK-NEXT: mov w10, #5423 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: msub w8, w8, w10, w13 -; CHECK-NEXT: mov v0.h[3], w8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: msub w8, w8, w10, w9 +; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -240,33 +240,33 @@ ; CHECK-LABEL: dont_fold_srem_i16_smax: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[2] -; CHECK-NEXT: mov w9, #17097 -; CHECK-NEXT: movk w9, #45590, lsl #16 +; CHECK-NEXT: smov w9, v0.h[2] +; CHECK-NEXT: mov w8, #17097 +; CHECK-NEXT: movk w8, #45590, lsl #16 ; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w11, #23 -; CHECK-NEXT: smull x9, w8, w9 -; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: add w9, w9, w8 -; CHECK-NEXT: asr w13, w9, #4 -; CHECK-NEXT: add w9, w13, w9, lsr #31 -; CHECK-NEXT: negs w13, w10 +; CHECK-NEXT: smov w11, v0.h[3] +; CHECK-NEXT: smull x8, w9, w8 +; CHECK-NEXT: and w12, w10, #0x7fff +; CHECK-NEXT: negs w10, w10 ; CHECK-NEXT: and w10, w10, #0x7fff -; CHECK-NEXT: and w13, w13, #0x7fff -; CHECK-NEXT: csneg w10, w10, w13, mi -; CHECK-NEXT: mov w13, #47143 -; CHECK-NEXT: movk w13, #24749, lsl #16 -; CHECK-NEXT: msub w8, w9, w11, w8 -; CHECK-NEXT: smull x9, w12, w13 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: csneg w10, w12, w10, mi ; CHECK-NEXT: mov v1.h[1], w10 -; CHECK-NEXT: lsr x10, x9, #63 -; CHECK-NEXT: asr x9, x9, #43 -; CHECK-NEXT: add w9, w9, w10 -; CHECK-NEXT: mov w10, #5423 +; CHECK-NEXT: mov w10, #47143 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: movk w10, #24749, lsl #16 +; CHECK-NEXT: asr w12, w8, #4 +; CHECK-NEXT: smull x10, w11, w10 +; CHECK-NEXT: add w8, w12, w8, lsr #31 +; CHECK-NEXT: mov w12, #23 +; CHECK-NEXT: msub w8, w8, w12, w9 +; CHECK-NEXT: lsr x9, x10, #63 +; CHECK-NEXT: asr x10, x10, #43 ; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: msub w8, w9, w10, w12 +; CHECK-NEXT: add w8, w10, w9 +; CHECK-NEXT: mov w9, #5423 +; CHECK-NEXT: msub w8, w8, w9, w11 ; CHECK-NEXT: mov v1.h[3], w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret @@ -285,32 +285,32 @@ ; CHECK-NEXT: movk x8, #17096, lsl #32 ; CHECK-NEXT: movk x12, #58853, lsl #16 ; CHECK-NEXT: movk x8, #45590, lsl #48 -; CHECK-NEXT: mov x14, #21445 +; CHECK-NEXT: mov x13, #21445 ; CHECK-NEXT: mov x10, v1.d[1] -; CHECK-NEXT: movk x12, #47142, lsl #32 ; CHECK-NEXT: smulh x8, x9, x8 -; CHECK-NEXT: movk x14, #1603, lsl #16 ; CHECK-NEXT: mov x11, v0.d[1] +; CHECK-NEXT: movk x12, #47142, lsl #32 +; CHECK-NEXT: movk x13, #1603, lsl #16 ; CHECK-NEXT: movk x12, #24749, lsl #48 -; CHECK-NEXT: add x8, x8, x9 -; CHECK-NEXT: movk x14, #15432, lsl #32 -; CHECK-NEXT: asr x13, x8, #4 -; CHECK-NEXT: movk x14, #25653, lsl #48 -; CHECK-NEXT: add x8, x13, x8, lsr #63 -; CHECK-NEXT: mov w13, #23 +; CHECK-NEXT: movk x13, #15432, lsl #32 +; CHECK-NEXT: movk x13, #25653, lsl #48 ; CHECK-NEXT: smulh x12, x10, x12 -; CHECK-NEXT: smulh x14, x11, x14 -; CHECK-NEXT: msub x8, x8, x13, x9 -; CHECK-NEXT: asr x13, x12, #11 -; CHECK-NEXT: add x12, x13, x12, lsr #63 -; CHECK-NEXT: asr x13, x14, #8 -; CHECK-NEXT: mov w9, #5423 -; CHECK-NEXT: add x13, x13, x14, lsr #63 -; CHECK-NEXT: mov w14, #654 -; CHECK-NEXT: msub x9, x12, x9, x10 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: msub x10, x13, x14, x11 +; CHECK-NEXT: smulh x13, x11, x13 +; CHECK-NEXT: add x8, x8, x9 +; CHECK-NEXT: asr x14, x8, #4 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: asr x15, x12, #11 +; CHECK-NEXT: add x8, x14, x8, lsr #63 +; CHECK-NEXT: mov w14, #23 +; CHECK-NEXT: asr x16, x13, #8 +; CHECK-NEXT: msub x8, x8, x14, x9 +; CHECK-NEXT: add x9, x15, x12, lsr #63 +; CHECK-NEXT: add x12, x16, x13, lsr #63 +; CHECK-NEXT: mov w13, #5423 +; CHECK-NEXT: msub x9, x9, x13, x10 +; CHECK-NEXT: mov w10, #654 +; CHECK-NEXT: msub x10, x12, x10, x11 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v1.d[1], x9 ; CHECK-NEXT: mov v0.d[1], x10 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sshl_sat.ll =================================================================== --- llvm/test/CodeGen/AArch64/sshl_sat.ll +++ llvm/test/CodeGen/AArch64/sshl_sat.ll @@ -146,8 +146,8 @@ ; CHECK-NEXT: mov w9, #-65536 ; CHECK-NEXT: mov w10, #-2147483648 ; CHECK-NEXT: ands w8, w9, w8, lsl #14 -; CHECK-NEXT: lsl w9, w8, #3 ; CHECK-NEXT: cinv w10, w10, ge +; CHECK-NEXT: lsl w9, w8, #3 ; CHECK-NEXT: cmp w8, w9, asr #3 ; CHECK-NEXT: csel w8, w10, w9, ne ; CHECK-NEXT: asr w0, w8, #16 Index: llvm/test/CodeGen/AArch64/ssub_sat_plus.ll =================================================================== --- llvm/test/CodeGen/AArch64/ssub_sat_plus.ll +++ llvm/test/CodeGen/AArch64/ssub_sat_plus.ll @@ -37,12 +37,12 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { ; CHECK-LABEL: func16: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sxth w10, w0 -; CHECK-NEXT: mov w8, #32767 -; CHECK-NEXT: sub w9, w10, w9, sxth -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: sub w8, w9, w8, sxth +; CHECK-NEXT: mov w9, #32767 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lt ; CHECK-NEXT: mov w9, #-32768 ; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w0, w8, w9, gt @@ -55,12 +55,12 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sxtb w10, w0 -; CHECK-NEXT: mov w8, #127 -; CHECK-NEXT: sub w9, w10, w9, sxtb -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: sub w8, w9, w8, sxtb +; CHECK-NEXT: mov w9, #127 +; CHECK-NEXT: cmp w8, #127 +; CHECK-NEXT: csel w8, w8, w9, lt ; CHECK-NEXT: mov w9, #-128 ; CHECK-NEXT: cmn w8, #128 ; CHECK-NEXT: csel w0, w8, w9, gt @@ -73,13 +73,13 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; CHECK-LABEL: func4: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sbfx w10, w0, #0, #4 -; CHECK-NEXT: mov w8, #7 -; CHECK-NEXT: lsl w9, w9, #28 -; CHECK-NEXT: sub w9, w10, w9, asr #28 -; CHECK-NEXT: cmp w9, #7 -; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sbfx w9, w0, #0, #4 +; CHECK-NEXT: lsl w8, w8, #28 +; CHECK-NEXT: sub w8, w9, w8, asr #28 +; CHECK-NEXT: mov w9, #7 +; CHECK-NEXT: cmp w8, #7 +; CHECK-NEXT: csel w8, w8, w9, lt ; CHECK-NEXT: mov w9, #-8 ; CHECK-NEXT: cmn w8, #8 ; CHECK-NEXT: csel w0, w8, w9, gt Index: llvm/test/CodeGen/AArch64/ssub_sat_vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -55,9 +55,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqsub v0.16b, v0.16b, v4.16b ; CHECK-NEXT: sqsub v1.16b, v1.16b, v5.16b +; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqsub v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -86,9 +86,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqsub v0.8h, v0.8h, v4.8h ; CHECK-NEXT: sqsub v1.8h, v1.8h, v5.8h +; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqsub v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -98,9 +98,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: sqsub v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -134,15 +134,15 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-LABEL: v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.b }[0], [x1] -; CHECK-NEXT: add x8, x1, #1 -; CHECK-NEXT: ld1 { v1.b }[0], [x0] -; CHECK-NEXT: add x9, x0, #1 +; CHECK-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-NEXT: add x8, x0, #1 +; CHECK-NEXT: ld1 { v1.b }[0], [x1] +; CHECK-NEXT: add x9, x1, #1 ; CHECK-NEXT: ld1 { v0.b }[4], [x8] ; CHECK-NEXT: ld1 { v1.b }[4], [x9] ; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: sqsub v0.2s, v1.2s, v0.2s +; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -159,9 +159,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: sqsub v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -174,15 +174,15 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-LABEL: v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.h }[0], [x1] -; CHECK-NEXT: add x8, x1, #2 -; CHECK-NEXT: ld1 { v1.h }[0], [x0] -; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-NEXT: add x9, x1, #2 ; CHECK-NEXT: ld1 { v0.h }[2], [x8] ; CHECK-NEXT: ld1 { v1.h }[2], [x9] ; CHECK-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sqsub v0.2s, v1.2s, v0.2s +; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -225,9 +225,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x1] -; CHECK-NEXT: ldr b1, [x0] -; CHECK-NEXT: sqsub v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: ldr b1, [x1] +; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -240,9 +240,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x1] -; CHECK-NEXT: ldr h1, [x0] -; CHECK-NEXT: sqsub v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ldr h1, [x1] +; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -310,9 +310,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqsub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: sqsub v1.4s, v1.4s, v5.4s +; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqsub v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -341,9 +341,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqsub v0.2d, v0.2d, v4.2d ; CHECK-NEXT: sqsub v1.2d, v1.2d, v5.2d +; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqsub v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) @@ -355,21 +355,21 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x2, x6 ; CHECK-NEXT: sbcs x9, x3, x7 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x2, x11, x8, ne -; CHECK-NEXT: eor x8, x11, #0x8000000000000000 +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: cset w11, vs +; CHECK-NEXT: cmp w11, #0 +; CHECK-NEXT: csel x2, x10, x8, ne +; CHECK-NEXT: eor x8, x10, #0x8000000000000000 ; CHECK-NEXT: csel x3, x8, x9, ne ; CHECK-NEXT: subs x8, x0, x4 ; CHECK-NEXT: sbcs x9, x1, x5 -; CHECK-NEXT: cset w10, vs -; CHECK-NEXT: asr x11, x9, #63 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: eor x10, x11, #0x8000000000000000 -; CHECK-NEXT: csel x8, x11, x8, ne -; CHECK-NEXT: csel x1, x10, x9, ne +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: cset w11, vs +; CHECK-NEXT: cmp w11, #0 +; CHECK-NEXT: csel x8, x10, x8, ne +; CHECK-NEXT: eor x11, x10, #0x8000000000000000 ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: csel x1, x11, x9, ne ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll =================================================================== --- llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll +++ llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll @@ -58,10 +58,10 @@ ; CHECK-NPOT-NEG-OFFSET: ldur x8, [x8, #-1] ; CHECK-257-OFFSET: add x8, x8, #257 ; CHECK-MINUS-257-OFFSET: sub x8, x8, #257 -; CHECK-NEXT: add x9, x9, #15 -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 ; CHECK-257-OFFSET-NEXT: ldr x8, [x8] ; CHECK-MINUS-257-OFFSET-NEXT: ldr x8, [x8] +; CHECK-NEXT: add x9, x9, #15 +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 ; CHECK-NEXT: stur x8, [x29, #-8] ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: sub x0, x8, x9 Index: llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll =================================================================== --- llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -208,13 +208,13 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: ldr x8, [sp, #64] ; CHECK-NEXT: mov x18, xzr ; CHECK-NEXT: ldr q0, [sp, #48] -; CHECK-NEXT: ldr x8, [sp, #64] ; CHECK-NEXT: mov w0, #42 ; CHECK-NEXT: mov w1, #17 -; CHECK-NEXT: str q0, [sp] ; CHECK-NEXT: str x8, [sp, #16] +; CHECK-NEXT: str q0, [sp] ; CHECK-NEXT: bl consume_attributes ; CHECK-NEXT: .Ltmp9: ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload Index: llvm/test/CodeGen/AArch64/sve-abd.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-abd.ll +++ llvm/test/CodeGen/AArch64/sve-abd.ll @@ -24,10 +24,10 @@ define @sabd_b_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: sabd_b_promoted_ops: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sabd z0.b, p2/m, z0.b, z1.b +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sabd z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %a.sext = sext %a to %b.sext = sext %b to @@ -144,10 +144,10 @@ define @uabd_b_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_b_promoted_ops: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1 -; CHECK-NEXT: uabd z0.b, p2/m, z0.b, z1.b +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uabd z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %a.zext = zext %a to %b.zext = zext %b to @@ -173,9 +173,9 @@ define @uabd_h_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_h_promoted_ops: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: and z1.h, z1.h, #0xff +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: uabd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %a.zext = zext %a to @@ -202,9 +202,9 @@ define @uabd_s_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_s_promoted_ops: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %a.zext = zext %a to @@ -231,9 +231,9 @@ define @uabd_d_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_d_promoted_ops: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uabd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %a.zext = zext %a to @@ -251,11 +251,11 @@ ; CHECK-NEXT: and z1.s, z1.s, #0xff ; CHECK-NEXT: uunpkhi z2.d, z0.s ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: uunpkhi z3.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpklo z3.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: sub z0.d, z0.d, z3.d +; CHECK-NEXT: sub z1.d, z2.d, z1.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: sub z0.d, z0.d, z1.d -; CHECK-NEXT: sub z1.d, z2.d, z3.d ; CHECK-NEXT: abs z1.d, p0/m, z1.d ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s @@ -273,9 +273,9 @@ define @uabd_non_matching_promoted_ops( %a, %b) #0 { ; CHECK-LABEL: uabd_non_matching_promoted_ops: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z0.s, z0.s, #0xff ; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %a.zext = zext %a to Index: llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -16,8 +16,8 @@ ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2] -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: st1d { z16.d }, p0, [sp] ; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl] @@ -43,25 +43,25 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: add x9, sp, #16 +; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: mov w1, #1 ; CHECK-NEXT: mov w2, #2 -; CHECK-NEXT: st1d { z16.d }, p0, [x9] -; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: mov w3, #3 +; CHECK-NEXT: st1d { z16.d }, p0, [x8] +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] +; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: mov w4, #4 ; CHECK-NEXT: mov w5, #5 ; CHECK-NEXT: mov w6, #6 -; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl] -; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: mov w7, #7 -; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl] ; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl] ; CHECK-NEXT: str x8, [sp] @@ -90,8 +90,8 @@ ; CHECK-NEXT: ld3d { z16.d, z17.d, z18.d }, p0/z, [x1] ; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2] ; CHECK-NEXT: fmov s1, #2.00000000 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: st1d { z16.d }, p0, [sp] ; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl] Index: llvm/test/CodeGen/AArch64/sve-extract-element.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -521,8 +521,8 @@ ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x8, w0 ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: whilels p0.s, xzr, x8 -; CHECK-NEXT: lastb w8, p0, z0.s +; CHECK-NEXT: whilels p1.s, xzr, x8 +; CHECK-NEXT: lastb w8, p1, z0.s ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %b = extractelement %a, i32 %x Index: llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -85,9 +85,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: subs x8, x8, #4 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: subs x8, x8, #4 +; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: cmp x8, #4 @@ -155,9 +155,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov w9, #8 -; CHECK-NEXT: subs x8, x8, #8 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: subs x8, x8, #8 +; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: cmp x8, #8 @@ -193,9 +193,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #8 -; CHECK-NEXT: subs x8, x8, #8 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: subs x8, x8, #8 +; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: cmp x8, #8 @@ -230,14 +230,14 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov x8, #-16 -; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldr q0, [x9, x8] +; CHECK-NEXT: ldr q0, [x10, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -263,9 +263,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: mov w9, #16 -; CHECK-NEXT: subs x8, x8, #16 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: cmp x8, #16 @@ -301,9 +301,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov w9, #16 -; CHECK-NEXT: subs x8, x8, #16 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: cmp x8, #16 @@ -341,9 +341,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #16 -; CHECK-NEXT: subs x8, x8, #16 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: subs x8, x8, #16 +; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: cmp x8, #16 @@ -491,9 +491,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: cntd x9 -; CHECK-NEXT: mov w10, #4 -; CHECK-NEXT: subs x9, x9, #4 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: subs x9, x9, #4 +; CHECK-NEXT: mov w10, #4 ; CHECK-NEXT: csel x9, xzr, x9, lo ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: cmp x9, #4 Index: llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll +++ llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll @@ -65,20 +65,20 @@ ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: punpkhi p2.h, p1.b -; CHECK-NEXT: punpklo p1.h, p1.b -; CHECK-NEXT: punpklo p2.h, p2.b ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: punpkhi p3.h, p1.b +; CHECK-NEXT: punpklo p2.h, p2.b +; CHECK-NEXT: punpklo p1.h, p1.b ; CHECK-NEXT: punpkhi p4.h, p2.b +; CHECK-NEXT: punpkhi p3.h, p1.b +; CHECK-NEXT: uzp1 p4.s, p4.s, p0.s +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: punpklo p1.h, p1.b ; CHECK-NEXT: punpklo p2.h, p2.b ; CHECK-NEXT: punpkhi p5.h, p3.b -; CHECK-NEXT: uzp1 p4.s, p4.s, p0.s +; CHECK-NEXT: punpklo p1.h, p1.b ; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: punpklo p3.h, p3.b ; CHECK-NEXT: uzp1 p2.s, p5.s, p2.s +; CHECK-NEXT: punpklo p3.h, p3.b ; CHECK-NEXT: punpkhi p5.h, p1.b ; CHECK-NEXT: punpklo p1.h, p1.b ; CHECK-NEXT: punpkhi p0.h, p0.b Index: llvm/test/CodeGen/AArch64/sve-fcopysign.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fcopysign.ll +++ llvm/test/CodeGen/AArch64/sve-fcopysign.ll @@ -99,8 +99,8 @@ define @test_copysign_v4f64_v4f32( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v4f64_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z3.d, z2.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z2.d, z2.s ; CHECK-NEXT: fcvt z3.d, p0/m, z3.s ; CHECK-NEXT: fcvt z2.d, p0/m, z2.s Index: llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll +++ llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll @@ -4,9 +4,9 @@ define void @vls_sve_and_64xi8(<64 x i8>* %ap, <64 x i8>* %out) nounwind { ; CHECK-LABEL: vls_sve_and_64xi8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl64 ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI0_0 -; CHECK-NEXT: ptrue p0.b, vl64 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x8] ; CHECK-NEXT: and z0.d, z0.d, z1.d Index: llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll @@ -7,13 +7,13 @@ ; CHECK-LABEL: st1d_fixed: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: add x8, sp, #8 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, sp, #8 ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x8] ; CHECK-NEXT: mov x8, #4 ; CHECK-NEXT: mov z0.d, #0 // =0x0 -; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] +; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %alloc = alloca [16 x double], i32 0 Index: llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll @@ -46,8 +46,8 @@ define void @ctlz_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: ctlz_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: clz z0.b, p0/m, z0.b @@ -406,8 +406,8 @@ define void @ctpop_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: ctpop_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: cnt z0.b, p0/m, z0.b @@ -779,8 +779,8 @@ define void @cttz_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: cttz_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: rbit z0.b, p0/m, z0.b Index: llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll @@ -45,8 +45,8 @@ define void @extract_subvector_v64i8(<64 x i8>* %a, <32 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: extract_subvector_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1] ; VBITS_GE_256-NEXT: ret @@ -326,13 +326,13 @@ ; VBITS_GE_256-LABEL: extract_subvector_v16i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: mov x9, #12 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x9, #12 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3] ; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] +; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ret %op = load <16 x i64>, <16 x i64>* %a %ret = call <8 x i64> @llvm.vector.extract.v8i64.v16i64(<16 x i64> %op, i64 8) Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll @@ -56,9 +56,9 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] +; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0 ; VBITS_GE_256-NEXT: fadda h0, p0, h0, z2.h ; VBITS_GE_256-NEXT: fadda h0, p0, h0, z1.h ; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -152,9 +152,9 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] +; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0 ; VBITS_GE_256-NEXT: fadda s0, p0, s0, z2.s ; VBITS_GE_256-NEXT: fadda s0, p0, s0, z1.s ; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 killed $z0 @@ -248,9 +248,9 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] +; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0 ; VBITS_GE_256-NEXT: fadda d0, p0, d0, z2.d ; VBITS_GE_256-NEXT: fadda d0, p0, d0, z1.d ; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $z0 Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll @@ -34,12 +34,12 @@ define void @select_v16f16(<16 x half>* %a, <16 x half>* %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h @@ -55,31 +55,31 @@ define void @select_v32f16(<32 x half>* %a, <32 x half>* %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: mov x9, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: ptrue p1.h -; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: and w8, w2, #0x1 +; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] +; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x9, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.h, w9 +; VBITS_GE_256-NEXT: mov z4.h, w8 ; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1 +; VBITS_GE_256-NEXT: ptrue p1.h ; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0 -; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h ; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h -; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h +; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x9, lsl #1] ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v32f16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] -; VBITS_GE_512-NEXT: ptrue p1.h ; VBITS_GE_512-NEXT: mov z2.h, w8 +; VBITS_GE_512-NEXT: ptrue p1.h ; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h @@ -95,12 +95,12 @@ define void @select_v64f16(<64 x half>* %a, <64 x half>* %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v64f16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl64 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h @@ -116,12 +116,12 @@ define void @select_v128f16(<128 x half>* %a, <128 x half>* %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v128f16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl128 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h @@ -163,12 +163,12 @@ define void @select_v8f32(<8 x float>* %a, <8 x float>* %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s @@ -184,31 +184,31 @@ define void @select_v16f32(<16 x float>* %a, <16 x float>* %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: mov x9, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: ptrue p1.s -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: and w8, w2, #0x1 +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.s, w9 +; VBITS_GE_256-NEXT: mov z4.s, w8 ; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1 +; VBITS_GE_256-NEXT: ptrue p1.s ; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0 -; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s ; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v16f32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] -; VBITS_GE_512-NEXT: ptrue p1.s ; VBITS_GE_512-NEXT: mov z2.s, w8 +; VBITS_GE_512-NEXT: ptrue p1.s ; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s @@ -224,12 +224,12 @@ define void @select_v32f32(<32 x float>* %a, <32 x float>* %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl32 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s @@ -245,12 +245,12 @@ define void @select_v64f32(<64 x float>* %a, <64 x float>* %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v64f32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl64 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s @@ -292,12 +292,12 @@ define void @select_v4f64(<4 x double>* %a, <4 x double>* %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d @@ -313,31 +313,31 @@ define void @select_v8f64(<8 x double>* %a, <8 x double>* %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: ptrue p1.d -; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: and w8, w2, #0x1 +; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.d, x9 +; VBITS_GE_256-NEXT: mov z4.d, x8 ; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1 +; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0 -; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d ; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x9, lsl #3] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v8f64: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] -; VBITS_GE_512-NEXT: ptrue p1.d ; VBITS_GE_512-NEXT: mov z2.d, x8 +; VBITS_GE_512-NEXT: ptrue p1.d ; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d @@ -353,12 +353,12 @@ define void @select_v16f64(<16 x double>* %a, <16 x double>* %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d @@ -374,12 +374,12 @@ define void @select_v32f64(<32 x double>* %a, <32 x double>* %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.d, vl32 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d Index: llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll @@ -256,8 +256,8 @@ ; VBITS_GE_256-LABEL: fcvtzu_v8f16_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s @@ -1200,8 +1200,8 @@ ; VBITS_GE_256-LABEL: fcvtzs_v8f16_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s Index: llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll @@ -11,57 +11,50 @@ define dso_local void @func1(i64* %v1, i64* %v2, i64* %v3, i64* %v4, i64* %v5, i64* %v6, i64* %v7, i64* %v8, ; CHECK-LABEL: func1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x25, [sp, #-64]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w23, -40 -; CHECK-NEXT: .cfi_offset w24, -48 -; CHECK-NEXT: .cfi_offset w25, -64 -; CHECK-NEXT: add x8, sp, #64 -; CHECK-NEXT: add x9, sp, #128 -; CHECK-NEXT: add x10, sp, #160 -; CHECK-NEXT: add x11, sp, #192 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: add x20, sp, #192 +; CHECK-NEXT: add x8, sp, #32 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9] -; CHECK-NEXT: ld1d { z2.d }, p0/z, [x10] -; CHECK-NEXT: ld1d { z3.d }, p0/z, [x11] -; CHECK-NEXT: ldp x18, x19, [sp, #368] -; CHECK-NEXT: add x21, sp, #160 +; CHECK-NEXT: add x8, sp, #96 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8] +; CHECK-NEXT: add x8, sp, #128 +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x8] +; CHECK-NEXT: add x8, sp, #160 +; CHECK-NEXT: ld1d { z3.d }, p0/z, [x8] +; CHECK-NEXT: add x22, sp, #160 +; CHECK-NEXT: ldp x20, x21, [sp, #336] +; CHECK-NEXT: ldp x9, x8, [sp, #312] +; CHECK-NEXT: ldp x11, x10, [sp, #296] +; CHECK-NEXT: ldp x13, x12, [sp, #280] +; CHECK-NEXT: ldp x18, x14, [sp, #264] +; CHECK-NEXT: ldr x15, [sp, #88] +; CHECK-NEXT: ldur q4, [sp, #72] +; CHECK-NEXT: ldp x16, x17, [sp, #192] +; CHECK-NEXT: ldr x19, [sp, #256] +; CHECK-NEXT: st1d { z3.d }, p0, [x22] ; CHECK-NEXT: add x22, sp, #128 -; CHECK-NEXT: ldp x24, x14, [sp, #296] -; CHECK-NEXT: add x23, sp, #64 -; CHECK-NEXT: ldr x25, [sp, #288] -; CHECK-NEXT: ldp x9, x8, [sp, #344] -; CHECK-NEXT: ldp x11, x10, [sp, #328] -; CHECK-NEXT: ldp x13, x12, [sp, #312] -; CHECK-NEXT: ldr x15, [sp, #120] -; CHECK-NEXT: ldur q4, [sp, #104] -; CHECK-NEXT: ldp x16, x17, [sp, #224] -; CHECK-NEXT: st1d { z3.d }, p0, [x20] -; CHECK-NEXT: st1d { z2.d }, p0, [x21] +; CHECK-NEXT: st1d { z2.d }, p0, [x22] +; CHECK-NEXT: add x22, sp, #96 ; CHECK-NEXT: st1d { z1.d }, p0, [x22] -; CHECK-NEXT: st1d { z0.d }, p0, [x23] -; CHECK-NEXT: stp x18, x19, [sp, #368] -; CHECK-NEXT: stp x25, x24, [sp, #288] -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: stp x16, x17, [sp, #224] -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: stur q4, [sp, #104] -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: str x15, [sp, #120] -; CHECK-NEXT: stp x14, x13, [sp, #304] -; CHECK-NEXT: stp x12, x11, [sp, #320] -; CHECK-NEXT: stp x10, x9, [sp, #336] -; CHECK-NEXT: str x8, [sp, #352] -; CHECK-NEXT: ldr x25, [sp], #64 // 8-byte Folded Reload +; CHECK-NEXT: add x22, sp, #32 +; CHECK-NEXT: st1d { z0.d }, p0, [x22] +; CHECK-NEXT: stp x20, x21, [sp, #336] +; CHECK-NEXT: stp x19, x18, [sp, #256] +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: stp x16, x17, [sp, #192] +; CHECK-NEXT: stur q4, [sp, #72] +; CHECK-NEXT: str x15, [sp, #88] +; CHECK-NEXT: stp x14, x13, [sp, #272] +; CHECK-NEXT: stp x12, x11, [sp, #288] +; CHECK-NEXT: stp x10, x9, [sp, #304] +; CHECK-NEXT: str x8, [sp, #320] +; CHECK-NEXT: ldp x22, x21, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: b func2 i64* %v9, i64* %v10, i64* %v11, i64* %v12, i64* %v13, i64* %v14, i64* %v15, i64* %v16, i64* %v17, i64* %v18, i64* %v19, i64* %v20, i64* %v21, i64* %v22, i64* %v23, i64* %v24, Index: llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll @@ -36,13 +36,13 @@ define <16 x half> @insertelement_v16f16(<16 x half>* %a) vscale_range(2,0) #0 { ; CHECK-LABEL: insertelement_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #15 ; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: mov w9, #15 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: fmov h2, #5.00000000 +; CHECK-NEXT: mov z1.h, w9 ; CHECK-NEXT: index z3.h, #0, #1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z1.h, w9 ; CHECK-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h ; CHECK-NEXT: mov z0.h, p1/m, h2 ; CHECK-NEXT: st1h { z0.h }, p0, [x8] @@ -56,14 +56,14 @@ ; VBITS_GE_256-LABEL: insertelement_v32f16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x9, #16 -; VBITS_GE_256-NEXT: mov w10, #15 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: fmov h3, #5.00000000 -; VBITS_GE_256-NEXT: index z4.h, #0, #1 -; VBITS_GE_256-NEXT: ptrue p1.h ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] +; VBITS_GE_256-NEXT: mov w10, #15 +; VBITS_GE_256-NEXT: fmov h3, #5.00000000 ; VBITS_GE_256-NEXT: mov z2.h, w10 +; VBITS_GE_256-NEXT: index z4.h, #0, #1 +; VBITS_GE_256-NEXT: ptrue p1.h ; VBITS_GE_256-NEXT: cmpeq p1.h, p1/z, z4.h, z2.h ; VBITS_GE_256-NEXT: mov z0.h, p1/m, h3 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x8, x9, lsl #1] @@ -72,13 +72,13 @@ ; ; VBITS_GE_512-LABEL: insertelement_v32f16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov w9, #31 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 +; VBITS_GE_512-NEXT: mov w9, #31 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_512-NEXT: fmov h2, #5.00000000 +; VBITS_GE_512-NEXT: mov z1.h, w9 ; VBITS_GE_512-NEXT: index z3.h, #0, #1 ; VBITS_GE_512-NEXT: ptrue p1.h -; VBITS_GE_512-NEXT: mov z1.h, w9 ; VBITS_GE_512-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h ; VBITS_GE_512-NEXT: mov z0.h, p1/m, h2 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8] @@ -91,13 +91,13 @@ define <64 x half> @insertelement_v64f16(<64 x half>* %a) vscale_range(8,0) #0 { ; CHECK-LABEL: insertelement_v64f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #63 ; CHECK-NEXT: ptrue p0.h, vl64 +; CHECK-NEXT: mov w9, #63 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: fmov h2, #5.00000000 +; CHECK-NEXT: mov z1.h, w9 ; CHECK-NEXT: index z3.h, #0, #1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z1.h, w9 ; CHECK-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h ; CHECK-NEXT: mov z0.h, p1/m, h2 ; CHECK-NEXT: st1h { z0.h }, p0, [x8] @@ -110,13 +110,13 @@ define <128 x half> @insertelement_v128f16(<128 x half>* %a) vscale_range(16,0) #0 { ; CHECK-LABEL: insertelement_v128f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #127 ; CHECK-NEXT: ptrue p0.h, vl128 +; CHECK-NEXT: mov w9, #127 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: fmov h2, #5.00000000 +; CHECK-NEXT: mov z1.h, w9 ; CHECK-NEXT: index z3.h, #0, #1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z1.h, w9 ; CHECK-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h ; CHECK-NEXT: mov z0.h, p1/m, h2 ; CHECK-NEXT: st1h { z0.h }, p0, [x8] @@ -153,13 +153,13 @@ define <8 x float> @insertelement_v8f32(<8 x float>* %a) vscale_range(2,0) #0 { ; CHECK-LABEL: insertelement_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #7 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: mov w9, #7 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: fmov s2, #5.00000000 +; CHECK-NEXT: mov z1.s, w9 ; CHECK-NEXT: index z3.s, #0, #1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z1.s, w9 ; CHECK-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s ; CHECK-NEXT: mov z0.s, p1/m, s2 ; CHECK-NEXT: st1w { z0.s }, p0, [x8] @@ -173,14 +173,14 @@ ; VBITS_GE_256-LABEL: insertelement_v16f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x9, #8 -; VBITS_GE_256-NEXT: mov w10, #7 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: fmov s3, #5.00000000 -; VBITS_GE_256-NEXT: index z4.s, #0, #1 -; VBITS_GE_256-NEXT: ptrue p1.s ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] +; VBITS_GE_256-NEXT: mov w10, #7 +; VBITS_GE_256-NEXT: fmov s3, #5.00000000 ; VBITS_GE_256-NEXT: mov z2.s, w10 +; VBITS_GE_256-NEXT: index z4.s, #0, #1 +; VBITS_GE_256-NEXT: ptrue p1.s ; VBITS_GE_256-NEXT: cmpeq p1.s, p1/z, z4.s, z2.s ; VBITS_GE_256-NEXT: mov z0.s, p1/m, s3 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] @@ -189,13 +189,13 @@ ; ; VBITS_GE_512-LABEL: insertelement_v16f32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov w9, #15 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 +; VBITS_GE_512-NEXT: mov w9, #15 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_512-NEXT: fmov s2, #5.00000000 +; VBITS_GE_512-NEXT: mov z1.s, w9 ; VBITS_GE_512-NEXT: index z3.s, #0, #1 ; VBITS_GE_512-NEXT: ptrue p1.s -; VBITS_GE_512-NEXT: mov z1.s, w9 ; VBITS_GE_512-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s ; VBITS_GE_512-NEXT: mov z0.s, p1/m, s2 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8] @@ -208,13 +208,13 @@ define <32 x float> @insertelement_v32f32(<32 x float>* %a) vscale_range(8,0) #0 { ; CHECK-LABEL: insertelement_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #31 ; CHECK-NEXT: ptrue p0.s, vl32 +; CHECK-NEXT: mov w9, #31 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: fmov s2, #5.00000000 +; CHECK-NEXT: mov z1.s, w9 ; CHECK-NEXT: index z3.s, #0, #1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z1.s, w9 ; CHECK-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s ; CHECK-NEXT: mov z0.s, p1/m, s2 ; CHECK-NEXT: st1w { z0.s }, p0, [x8] @@ -227,13 +227,13 @@ define <64 x float> @insertelement_v64f32(<64 x float>* %a) vscale_range(16,0) #0 { ; CHECK-LABEL: insertelement_v64f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #63 ; CHECK-NEXT: ptrue p0.s, vl64 +; CHECK-NEXT: mov w9, #63 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: fmov s2, #5.00000000 +; CHECK-NEXT: mov z1.s, w9 ; CHECK-NEXT: index z3.s, #0, #1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z1.s, w9 ; CHECK-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s ; CHECK-NEXT: mov z0.s, p1/m, s2 ; CHECK-NEXT: st1w { z0.s }, p0, [x8] @@ -268,13 +268,13 @@ define <4 x double> @insertelement_v4f64(<4 x double>* %a) vscale_range(2,0) #0 { ; CHECK-LABEL: insertelement_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #3 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: mov w9, #3 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: fmov d2, #5.00000000 +; CHECK-NEXT: mov z1.d, x9 ; CHECK-NEXT: index z3.d, #0, #1 ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z1.d, x9 ; CHECK-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d ; CHECK-NEXT: mov z0.d, p1/m, d2 ; CHECK-NEXT: st1d { z0.d }, p0, [x8] @@ -288,14 +288,14 @@ ; VBITS_GE_256-LABEL: insertelement_v8f64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x9, #4 -; VBITS_GE_256-NEXT: mov w10, #3 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: fmov d3, #5.00000000 -; VBITS_GE_256-NEXT: index z4.d, #0, #1 -; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] +; VBITS_GE_256-NEXT: mov w10, #3 +; VBITS_GE_256-NEXT: fmov d3, #5.00000000 ; VBITS_GE_256-NEXT: mov z2.d, x10 +; VBITS_GE_256-NEXT: index z4.d, #0, #1 +; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: cmpeq p1.d, p1/z, z4.d, z2.d ; VBITS_GE_256-NEXT: mov z0.d, p1/m, d3 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3] @@ -304,13 +304,13 @@ ; ; VBITS_GE_512-LABEL: insertelement_v8f64: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov w9, #7 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 +; VBITS_GE_512-NEXT: mov w9, #7 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] ; VBITS_GE_512-NEXT: fmov d2, #5.00000000 +; VBITS_GE_512-NEXT: mov z1.d, x9 ; VBITS_GE_512-NEXT: index z3.d, #0, #1 ; VBITS_GE_512-NEXT: ptrue p1.d -; VBITS_GE_512-NEXT: mov z1.d, x9 ; VBITS_GE_512-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d ; VBITS_GE_512-NEXT: mov z0.d, p1/m, d2 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8] @@ -323,13 +323,13 @@ define <16 x double> @insertelement_v16f64(<16 x double>* %a) vscale_range(8,0) #0 { ; CHECK-LABEL: insertelement_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #15 ; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: mov w9, #15 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: fmov d2, #5.00000000 +; CHECK-NEXT: mov z1.d, x9 ; CHECK-NEXT: index z3.d, #0, #1 ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z1.d, x9 ; CHECK-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d ; CHECK-NEXT: mov z0.d, p1/m, d2 ; CHECK-NEXT: st1d { z0.d }, p0, [x8] @@ -342,13 +342,13 @@ define <32 x double> @insertelement_v32f64(<32 x double>* %a) vscale_range(16,0) #0 { ; CHECK-LABEL: insertelement_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #31 ; CHECK-NEXT: ptrue p0.d, vl32 +; CHECK-NEXT: mov w9, #31 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: fmov d2, #5.00000000 +; CHECK-NEXT: mov z1.d, x9 ; CHECK-NEXT: index z3.d, #0, #1 ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z1.d, x9 ; CHECK-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d ; CHECK-NEXT: mov z0.d, p1/m, d2 ; CHECK-NEXT: st1d { z0.d }, p0, [x8] Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll @@ -48,8 +48,8 @@ define void @add_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: add_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -449,8 +449,8 @@ define void @mul_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: mul_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -851,8 +851,8 @@ define void @sub_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: sub_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -1246,8 +1246,8 @@ define void @abs_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: abs_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: abs z0.b, p0/m, z0.b @@ -1352,21 +1352,21 @@ define void @abs_v64i16(<64 x i16>* %a) vscale_range(2,0) #0 { ; CHECK-LABEL: abs_v64i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #48 -; CHECK-NEXT: mov x9, #16 -; CHECK-NEXT: mov x10, #32 +; CHECK-NEXT: mov x8, #32 ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1] -; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1] +; CHECK-NEXT: mov x9, #48 +; CHECK-NEXT: mov x10, #16 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x10, lsl #1] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0] ; CHECK-NEXT: abs z1.h, p0/m, z1.h ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: abs z2.h, p0/m, z2.h ; CHECK-NEXT: abs z3.h, p0/m, z3.h -; CHECK-NEXT: st1h { z2.h }, p0, [x0, x10, lsl #1] -; CHECK-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; CHECK-NEXT: st1h { z1.h }, p0, [x0, x9, lsl #1] +; CHECK-NEXT: st1h { z2.h }, p0, [x0, x8, lsl #1] +; CHECK-NEXT: st1h { z0.h }, p0, [x0, x9, lsl #1] +; CHECK-NEXT: st1h { z1.h }, p0, [x0, x10, lsl #1] ; CHECK-NEXT: st1h { z3.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i16>, <64 x i16>* %a @@ -1379,19 +1379,19 @@ ; CHECK-LABEL: abs_v128i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #96 -; CHECK-NEXT: mov x9, #48 -; CHECK-NEXT: mov x10, #16 +; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: mov x9, #112 +; CHECK-NEXT: mov x10, #64 ; CHECK-NEXT: mov x11, #80 ; CHECK-NEXT: mov x12, #32 -; CHECK-NEXT: mov x13, #112 -; CHECK-NEXT: mov x14, #64 -; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x10, lsl #1] +; CHECK-NEXT: mov x13, #48 +; CHECK-NEXT: mov x14, #16 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x13, lsl #1] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x14, lsl #1] ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, x11, lsl #1] ; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0, x12, lsl #1] -; CHECK-NEXT: ld1h { z4.h }, p0/z, [x0, x13, lsl #1] -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x0, x14, lsl #1] +; CHECK-NEXT: ld1h { z4.h }, p0/z, [x0, x9, lsl #1] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x0, x10, lsl #1] ; CHECK-NEXT: ld1h { z6.h }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1h { z7.h }, p0/z, [x0] ; CHECK-NEXT: abs z1.h, p0/m, z1.h @@ -1403,12 +1403,12 @@ ; CHECK-NEXT: abs z6.h, p0/m, z6.h ; CHECK-NEXT: abs z7.h, p0/m, z7.h ; CHECK-NEXT: st1h { z6.h }, p0, [x0, x8, lsl #1] -; CHECK-NEXT: st1h { z4.h }, p0, [x0, x13, lsl #1] -; CHECK-NEXT: st1h { z5.h }, p0, [x0, x14, lsl #1] +; CHECK-NEXT: st1h { z4.h }, p0, [x0, x9, lsl #1] +; CHECK-NEXT: st1h { z5.h }, p0, [x0, x10, lsl #1] ; CHECK-NEXT: st1h { z2.h }, p0, [x0, x11, lsl #1] ; CHECK-NEXT: st1h { z3.h }, p0, [x0, x12, lsl #1] -; CHECK-NEXT: st1h { z0.h }, p0, [x0, x9, lsl #1] -; CHECK-NEXT: st1h { z1.h }, p0, [x0, x10, lsl #1] +; CHECK-NEXT: st1h { z0.h }, p0, [x0, x13, lsl #1] +; CHECK-NEXT: st1h { z1.h }, p0, [x0, x14, lsl #1] ; CHECK-NEXT: st1h { z7.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <128 x i16>, <128 x i16>* %a Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll @@ -52,8 +52,8 @@ define void @icmp_eq_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: icmp_eq_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll @@ -98,12 +98,12 @@ ; VBITS_GE_128-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_128-NEXT: sunpkhi z2.h, z1.b ; VBITS_GE_128-NEXT: sunpkhi z3.h, z0.b -; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: sunpklo z1.h, z1.b ; VBITS_GE_128-NEXT: sunpkhi z4.s, z2.h ; VBITS_GE_128-NEXT: sunpkhi z5.s, z3.h +; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: sunpklo z2.s, z2.h ; VBITS_GE_128-NEXT: sunpklo z3.s, z3.h +; VBITS_GE_128-NEXT: sunpklo z1.h, z1.b ; VBITS_GE_128-NEXT: sunpklo z0.h, z0.b ; VBITS_GE_128-NEXT: sdivr z4.s, p0/m, z4.s, z5.s ; VBITS_GE_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s @@ -243,12 +243,12 @@ ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s -; CHECK-NEXT: sunpkhi z5.s, z1.h ; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s -; CHECK-NEXT: sunpkhi z3.s, z0.h +; CHECK-NEXT: sunpkhi z3.s, z1.h +; CHECK-NEXT: sunpkhi z5.s, z0.h ; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: sdiv z3.s, p1/m, z3.s, z5.s +; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z5.s ; CHECK-NEXT: sdiv z0.s, p1/m, z0.s, z1.s ; CHECK-NEXT: uzp1 z1.h, z2.h, z4.h ; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h @@ -314,9 +314,9 @@ ; VBITS_GE_128: // %bb.0: ; VBITS_GE_128-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_128-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: sunpkhi z2.s, z1.h ; VBITS_GE_128-NEXT: sunpkhi z3.s, z0.h +; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_128-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s @@ -811,12 +811,12 @@ ; VBITS_GE_128-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_128-NEXT: uunpkhi z2.h, z1.b ; VBITS_GE_128-NEXT: uunpkhi z3.h, z0.b -; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: uunpklo z1.h, z1.b ; VBITS_GE_128-NEXT: uunpkhi z4.s, z2.h ; VBITS_GE_128-NEXT: uunpkhi z5.s, z3.h +; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_128-NEXT: uunpklo z3.s, z3.h +; VBITS_GE_128-NEXT: uunpklo z1.h, z1.b ; VBITS_GE_128-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_128-NEXT: udivr z4.s, p0/m, z4.s, z5.s ; VBITS_GE_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s @@ -956,12 +956,12 @@ ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s -; CHECK-NEXT: uunpkhi z5.s, z1.h ; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s -; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpkhi z5.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: udiv z3.s, p1/m, z3.s, z5.s +; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z5.s ; CHECK-NEXT: udiv z0.s, p1/m, z0.s, z1.s ; CHECK-NEXT: uzp1 z1.h, z2.h, z4.h ; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h @@ -1027,9 +1027,9 @@ ; VBITS_GE_128: // %bb.0: ; VBITS_GE_128-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_128-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: uunpkhi z2.s, z1.h ; VBITS_GE_128-NEXT: uunpkhi z3.s, z0.h +; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: uunpklo z1.s, z1.h ; VBITS_GE_128-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll @@ -48,8 +48,8 @@ define void @and_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: and_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -444,8 +444,8 @@ define void @or_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: or_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -840,8 +840,8 @@ define void @xor_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: xor_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll @@ -48,8 +48,8 @@ define void @smax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: smax_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -452,8 +452,8 @@ define void @smin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: smin_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -856,8 +856,8 @@ define void @umax_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: umax_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -1260,8 +1260,8 @@ define void @umin_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: umin_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll @@ -86,13 +86,13 @@ define void @smulh_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: smulh_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: ptrue p1.h, vl16 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] +; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: sunpklo z4.h, z0.b ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 ; VBITS_GE_256-NEXT: sunpklo z5.h, z1.b @@ -252,33 +252,31 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: ptrue p1.h, vl8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z7.d, z1.d -; VBITS_GE_256-NEXT: mov z16.d, z3.d -; VBITS_GE_256-NEXT: ext z7.b, z7.b, z7.b, #16 +; VBITS_GE_256-NEXT: ptrue p1.h, vl8 ; VBITS_GE_256-NEXT: smull2 v4.4s, v0.8h, v2.8h -; VBITS_GE_256-NEXT: ext z16.b, z16.b, z3.b, #16 ; VBITS_GE_256-NEXT: smull v5.4s, v0.4h, v2.4h ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 ; VBITS_GE_256-NEXT: smull2 v6.4s, v1.8h, v3.8h -; VBITS_GE_256-NEXT: smull v1.4s, v1.4h, v3.4h -; VBITS_GE_256-NEXT: smull2 v3.4s, v0.8h, v2.8h +; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 +; VBITS_GE_256-NEXT: smull v7.4s, v1.4h, v3.4h +; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16 +; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16 +; VBITS_GE_256-NEXT: smull2 v16.4s, v0.8h, v2.8h ; VBITS_GE_256-NEXT: smull v0.4s, v0.4h, v2.4h -; VBITS_GE_256-NEXT: smull2 v2.4s, v7.8h, v16.8h -; VBITS_GE_256-NEXT: smull v7.4s, v7.4h, v16.4h -; VBITS_GE_256-NEXT: uzp2 v4.8h, v5.8h, v4.8h -; VBITS_GE_256-NEXT: uzp2 v1.8h, v1.8h, v6.8h -; VBITS_GE_256-NEXT: uzp2 v0.8h, v0.8h, v3.8h -; VBITS_GE_256-NEXT: uzp2 v2.8h, v7.8h, v2.8h -; VBITS_GE_256-NEXT: splice z4.h, p1, z4.h, z0.h -; VBITS_GE_256-NEXT: splice z1.h, p1, z1.h, z2.h -; VBITS_GE_256-NEXT: st1h { z4.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: smull2 v2.4s, v1.8h, v3.8h +; VBITS_GE_256-NEXT: smull v1.4s, v1.4h, v3.4h +; VBITS_GE_256-NEXT: uzp2 v3.8h, v5.8h, v4.8h +; VBITS_GE_256-NEXT: uzp2 v0.8h, v0.8h, v16.8h +; VBITS_GE_256-NEXT: uzp2 v4.8h, v7.8h, v6.8h +; VBITS_GE_256-NEXT: uzp2 v1.8h, v1.8h, v2.8h +; VBITS_GE_256-NEXT: splice z3.h, p1, z3.h, z0.h +; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: splice z4.h, p1, z4.h, z1.h +; VBITS_GE_256-NEXT: st1h { z4.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smulh_v32i16: @@ -399,33 +397,31 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z7.d, z1.d -; VBITS_GE_256-NEXT: mov z16.d, z3.d -; VBITS_GE_256-NEXT: ext z7.b, z7.b, z7.b, #16 +; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: smull2 v4.2d, v0.4s, v2.4s -; VBITS_GE_256-NEXT: ext z16.b, z16.b, z3.b, #16 ; VBITS_GE_256-NEXT: smull v5.2d, v0.2s, v2.2s ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 ; VBITS_GE_256-NEXT: smull2 v6.2d, v1.4s, v3.4s -; VBITS_GE_256-NEXT: smull v1.2d, v1.2s, v3.2s -; VBITS_GE_256-NEXT: smull2 v3.2d, v0.4s, v2.4s +; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 +; VBITS_GE_256-NEXT: smull v7.2d, v1.2s, v3.2s +; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16 +; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16 +; VBITS_GE_256-NEXT: smull2 v16.2d, v0.4s, v2.4s ; VBITS_GE_256-NEXT: smull v0.2d, v0.2s, v2.2s -; VBITS_GE_256-NEXT: smull2 v2.2d, v7.4s, v16.4s -; VBITS_GE_256-NEXT: smull v7.2d, v7.2s, v16.2s -; VBITS_GE_256-NEXT: uzp2 v4.4s, v5.4s, v4.4s -; VBITS_GE_256-NEXT: uzp2 v1.4s, v1.4s, v6.4s -; VBITS_GE_256-NEXT: uzp2 v0.4s, v0.4s, v3.4s -; VBITS_GE_256-NEXT: uzp2 v2.4s, v7.4s, v2.4s -; VBITS_GE_256-NEXT: splice z4.s, p1, z4.s, z0.s -; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z2.s -; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: smull2 v2.2d, v1.4s, v3.4s +; VBITS_GE_256-NEXT: smull v1.2d, v1.2s, v3.2s +; VBITS_GE_256-NEXT: uzp2 v3.4s, v5.4s, v4.4s +; VBITS_GE_256-NEXT: uzp2 v0.4s, v0.4s, v16.4s +; VBITS_GE_256-NEXT: uzp2 v4.4s, v7.4s, v6.4s +; VBITS_GE_256-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; VBITS_GE_256-NEXT: splice z3.s, p1, z3.s, z0.s +; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: splice z4.s, p1, z4.s, z1.s +; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smulh_v16i32: @@ -550,55 +546,55 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: ptrue p1.d, vl2 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: ptrue p1.d, vl2 ; VBITS_GE_256-NEXT: mov x9, v0.d[1] ; VBITS_GE_256-NEXT: fmov x10, d0 +; VBITS_GE_256-NEXT: mov x15, v2.d[1] ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_GE_256-NEXT: fmov x17, d2 -; VBITS_GE_256-NEXT: mov x13, v2.d[1] +; VBITS_GE_256-NEXT: fmov x16, d2 ; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 -; VBITS_GE_256-NEXT: mov x14, v0.d[1] -; VBITS_GE_256-NEXT: mov x18, v2.d[1] -; VBITS_GE_256-NEXT: smulh x10, x10, x17 +; VBITS_GE_256-NEXT: mov x12, v0.d[1] +; VBITS_GE_256-NEXT: fmov x13, d0 +; VBITS_GE_256-NEXT: mov x17, v2.d[1] +; VBITS_GE_256-NEXT: fmov x1, d2 +; VBITS_GE_256-NEXT: smulh x10, x10, x16 +; VBITS_GE_256-NEXT: smulh x9, x9, x15 +; VBITS_GE_256-NEXT: fmov x14, d1 ; VBITS_GE_256-NEXT: mov x11, v1.d[1] -; VBITS_GE_256-NEXT: fmov x12, d1 ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16 ; VBITS_GE_256-NEXT: mov x2, v3.d[1] +; VBITS_GE_256-NEXT: smulh x13, x13, x1 ; VBITS_GE_256-NEXT: fmov x3, d3 +; VBITS_GE_256-NEXT: smulh x12, x12, x17 +; VBITS_GE_256-NEXT: mov x18, v1.d[1] ; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16 -; VBITS_GE_256-NEXT: smulh x9, x9, x13 -; VBITS_GE_256-NEXT: mov x13, v1.d[1] -; VBITS_GE_256-NEXT: smulh x14, x14, x18 -; VBITS_GE_256-NEXT: mov x18, v3.d[1] -; VBITS_GE_256-NEXT: smulh x12, x12, x3 -; VBITS_GE_256-NEXT: fmov x15, d0 -; VBITS_GE_256-NEXT: fmov x16, d1 -; VBITS_GE_256-NEXT: fmov x1, d2 -; VBITS_GE_256-NEXT: fmov x17, d3 +; VBITS_GE_256-NEXT: fmov x17, d1 ; VBITS_GE_256-NEXT: fmov d0, x9 ; VBITS_GE_256-NEXT: fmov d1, x10 -; VBITS_GE_256-NEXT: smulh x9, x11, x2 -; VBITS_GE_256-NEXT: smulh x15, x15, x1 -; VBITS_GE_256-NEXT: fmov d4, x12 -; VBITS_GE_256-NEXT: smulh x16, x16, x17 -; VBITS_GE_256-NEXT: smulh x10, x13, x18 +; VBITS_GE_256-NEXT: mov x1, v3.d[1] +; VBITS_GE_256-NEXT: fmov x16, d3 +; VBITS_GE_256-NEXT: smulh x14, x14, x3 +; VBITS_GE_256-NEXT: fmov d2, x13 +; VBITS_GE_256-NEXT: smulh x11, x11, x2 ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] -; VBITS_GE_256-NEXT: fmov d0, x14 -; VBITS_GE_256-NEXT: fmov d2, x15 -; VBITS_GE_256-NEXT: fmov d3, x9 -; VBITS_GE_256-NEXT: fmov d6, x16 -; VBITS_GE_256-NEXT: fmov d5, x10 +; VBITS_GE_256-NEXT: fmov d0, x12 +; VBITS_GE_256-NEXT: smulh x15, x17, x16 +; VBITS_GE_256-NEXT: smulh x16, x18, x1 +; VBITS_GE_256-NEXT: fmov d3, x14 ; VBITS_GE_256-NEXT: mov v2.d[1], v0.d[0] -; VBITS_GE_256-NEXT: mov v4.d[1], v3.d[0] -; VBITS_GE_256-NEXT: mov v6.d[1], v5.d[0] +; VBITS_GE_256-NEXT: fmov d0, x11 +; VBITS_GE_256-NEXT: fmov d4, x15 +; VBITS_GE_256-NEXT: mov v3.d[1], v0.d[0] +; VBITS_GE_256-NEXT: fmov d0, x16 ; VBITS_GE_256-NEXT: splice z1.d, p1, z1.d, z2.d ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: splice z4.d, p1, z4.d, z6.d -; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x0] +; VBITS_GE_256-NEXT: mov v4.d[1], v0.d[0] +; VBITS_GE_256-NEXT: splice z3.d, p1, z3.d, z4.d +; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: smulh_v8i64: @@ -736,13 +732,13 @@ define void @umulh_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: umulh_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: ptrue p1.h, vl16 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] +; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: uunpklo z4.h, z0.b ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 ; VBITS_GE_256-NEXT: uunpklo z5.h, z1.b @@ -902,33 +898,31 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: ptrue p1.h, vl8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z7.d, z1.d -; VBITS_GE_256-NEXT: mov z16.d, z3.d -; VBITS_GE_256-NEXT: ext z7.b, z7.b, z7.b, #16 +; VBITS_GE_256-NEXT: ptrue p1.h, vl8 ; VBITS_GE_256-NEXT: umull2 v4.4s, v0.8h, v2.8h -; VBITS_GE_256-NEXT: ext z16.b, z16.b, z3.b, #16 ; VBITS_GE_256-NEXT: umull v5.4s, v0.4h, v2.4h ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 ; VBITS_GE_256-NEXT: umull2 v6.4s, v1.8h, v3.8h -; VBITS_GE_256-NEXT: umull v1.4s, v1.4h, v3.4h -; VBITS_GE_256-NEXT: umull2 v3.4s, v0.8h, v2.8h +; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 +; VBITS_GE_256-NEXT: umull v7.4s, v1.4h, v3.4h +; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16 +; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16 +; VBITS_GE_256-NEXT: umull2 v16.4s, v0.8h, v2.8h ; VBITS_GE_256-NEXT: umull v0.4s, v0.4h, v2.4h -; VBITS_GE_256-NEXT: umull2 v2.4s, v7.8h, v16.8h -; VBITS_GE_256-NEXT: umull v7.4s, v7.4h, v16.4h -; VBITS_GE_256-NEXT: uzp2 v4.8h, v5.8h, v4.8h -; VBITS_GE_256-NEXT: uzp2 v1.8h, v1.8h, v6.8h -; VBITS_GE_256-NEXT: uzp2 v0.8h, v0.8h, v3.8h -; VBITS_GE_256-NEXT: uzp2 v2.8h, v7.8h, v2.8h -; VBITS_GE_256-NEXT: splice z4.h, p1, z4.h, z0.h -; VBITS_GE_256-NEXT: splice z1.h, p1, z1.h, z2.h -; VBITS_GE_256-NEXT: st1h { z4.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: umull2 v2.4s, v1.8h, v3.8h +; VBITS_GE_256-NEXT: umull v1.4s, v1.4h, v3.4h +; VBITS_GE_256-NEXT: uzp2 v3.8h, v5.8h, v4.8h +; VBITS_GE_256-NEXT: uzp2 v0.8h, v0.8h, v16.8h +; VBITS_GE_256-NEXT: uzp2 v4.8h, v7.8h, v6.8h +; VBITS_GE_256-NEXT: uzp2 v1.8h, v1.8h, v2.8h +; VBITS_GE_256-NEXT: splice z3.h, p1, z3.h, z0.h +; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: splice z4.h, p1, z4.h, z1.h +; VBITS_GE_256-NEXT: st1h { z4.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umulh_v32i16: @@ -1051,33 +1045,31 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z7.d, z1.d -; VBITS_GE_256-NEXT: mov z16.d, z3.d -; VBITS_GE_256-NEXT: ext z7.b, z7.b, z7.b, #16 +; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: umull2 v4.2d, v0.4s, v2.4s -; VBITS_GE_256-NEXT: ext z16.b, z16.b, z3.b, #16 ; VBITS_GE_256-NEXT: umull v5.2d, v0.2s, v2.2s ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 ; VBITS_GE_256-NEXT: umull2 v6.2d, v1.4s, v3.4s -; VBITS_GE_256-NEXT: umull v1.2d, v1.2s, v3.2s -; VBITS_GE_256-NEXT: umull2 v3.2d, v0.4s, v2.4s +; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 +; VBITS_GE_256-NEXT: umull v7.2d, v1.2s, v3.2s +; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16 +; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16 +; VBITS_GE_256-NEXT: umull2 v16.2d, v0.4s, v2.4s ; VBITS_GE_256-NEXT: umull v0.2d, v0.2s, v2.2s -; VBITS_GE_256-NEXT: umull2 v2.2d, v7.4s, v16.4s -; VBITS_GE_256-NEXT: umull v7.2d, v7.2s, v16.2s -; VBITS_GE_256-NEXT: uzp2 v4.4s, v5.4s, v4.4s -; VBITS_GE_256-NEXT: uzp2 v1.4s, v1.4s, v6.4s -; VBITS_GE_256-NEXT: uzp2 v0.4s, v0.4s, v3.4s -; VBITS_GE_256-NEXT: uzp2 v2.4s, v7.4s, v2.4s -; VBITS_GE_256-NEXT: splice z4.s, p1, z4.s, z0.s -; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z2.s -; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: umull2 v2.2d, v1.4s, v3.4s +; VBITS_GE_256-NEXT: umull v1.2d, v1.2s, v3.2s +; VBITS_GE_256-NEXT: uzp2 v3.4s, v5.4s, v4.4s +; VBITS_GE_256-NEXT: uzp2 v0.4s, v0.4s, v16.4s +; VBITS_GE_256-NEXT: uzp2 v4.4s, v7.4s, v6.4s +; VBITS_GE_256-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; VBITS_GE_256-NEXT: splice z3.s, p1, z3.s, z0.s +; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: splice z4.s, p1, z4.s, z1.s +; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umulh_v16i32: @@ -1200,55 +1192,55 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: ptrue p1.d, vl2 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: ptrue p1.d, vl2 ; VBITS_GE_256-NEXT: mov x9, v0.d[1] ; VBITS_GE_256-NEXT: fmov x10, d0 +; VBITS_GE_256-NEXT: mov x15, v2.d[1] ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_GE_256-NEXT: fmov x17, d2 -; VBITS_GE_256-NEXT: mov x13, v2.d[1] +; VBITS_GE_256-NEXT: fmov x16, d2 ; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 -; VBITS_GE_256-NEXT: mov x14, v0.d[1] -; VBITS_GE_256-NEXT: mov x18, v2.d[1] -; VBITS_GE_256-NEXT: umulh x10, x10, x17 +; VBITS_GE_256-NEXT: mov x12, v0.d[1] +; VBITS_GE_256-NEXT: fmov x13, d0 +; VBITS_GE_256-NEXT: mov x17, v2.d[1] +; VBITS_GE_256-NEXT: fmov x1, d2 +; VBITS_GE_256-NEXT: umulh x10, x10, x16 +; VBITS_GE_256-NEXT: umulh x9, x9, x15 +; VBITS_GE_256-NEXT: fmov x14, d1 ; VBITS_GE_256-NEXT: mov x11, v1.d[1] -; VBITS_GE_256-NEXT: fmov x12, d1 ; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16 ; VBITS_GE_256-NEXT: mov x2, v3.d[1] +; VBITS_GE_256-NEXT: umulh x13, x13, x1 ; VBITS_GE_256-NEXT: fmov x3, d3 +; VBITS_GE_256-NEXT: umulh x12, x12, x17 +; VBITS_GE_256-NEXT: mov x18, v1.d[1] ; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16 -; VBITS_GE_256-NEXT: umulh x9, x9, x13 -; VBITS_GE_256-NEXT: mov x13, v1.d[1] -; VBITS_GE_256-NEXT: umulh x14, x14, x18 -; VBITS_GE_256-NEXT: mov x18, v3.d[1] -; VBITS_GE_256-NEXT: umulh x12, x12, x3 -; VBITS_GE_256-NEXT: fmov x15, d0 -; VBITS_GE_256-NEXT: fmov x16, d1 -; VBITS_GE_256-NEXT: fmov x1, d2 -; VBITS_GE_256-NEXT: fmov x17, d3 +; VBITS_GE_256-NEXT: fmov x17, d1 ; VBITS_GE_256-NEXT: fmov d0, x9 ; VBITS_GE_256-NEXT: fmov d1, x10 -; VBITS_GE_256-NEXT: umulh x9, x11, x2 -; VBITS_GE_256-NEXT: umulh x15, x15, x1 -; VBITS_GE_256-NEXT: fmov d4, x12 -; VBITS_GE_256-NEXT: umulh x16, x16, x17 -; VBITS_GE_256-NEXT: umulh x10, x13, x18 +; VBITS_GE_256-NEXT: mov x1, v3.d[1] +; VBITS_GE_256-NEXT: fmov x16, d3 +; VBITS_GE_256-NEXT: umulh x14, x14, x3 +; VBITS_GE_256-NEXT: fmov d2, x13 +; VBITS_GE_256-NEXT: umulh x11, x11, x2 ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] -; VBITS_GE_256-NEXT: fmov d0, x14 -; VBITS_GE_256-NEXT: fmov d2, x15 -; VBITS_GE_256-NEXT: fmov d3, x9 -; VBITS_GE_256-NEXT: fmov d6, x16 -; VBITS_GE_256-NEXT: fmov d5, x10 +; VBITS_GE_256-NEXT: fmov d0, x12 +; VBITS_GE_256-NEXT: umulh x15, x17, x16 +; VBITS_GE_256-NEXT: umulh x16, x18, x1 +; VBITS_GE_256-NEXT: fmov d3, x14 ; VBITS_GE_256-NEXT: mov v2.d[1], v0.d[0] -; VBITS_GE_256-NEXT: mov v4.d[1], v3.d[0] -; VBITS_GE_256-NEXT: mov v6.d[1], v5.d[0] +; VBITS_GE_256-NEXT: fmov d0, x11 +; VBITS_GE_256-NEXT: fmov d4, x15 +; VBITS_GE_256-NEXT: mov v3.d[1], v0.d[0] +; VBITS_GE_256-NEXT: fmov d0, x16 ; VBITS_GE_256-NEXT: splice z1.d, p1, z1.d, z2.d ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: splice z4.d, p1, z4.d, z6.d -; VBITS_GE_256-NEXT: st1d { z4.d }, p0, [x0] +; VBITS_GE_256-NEXT: mov v4.d[1], v0.d[0] +; VBITS_GE_256-NEXT: splice z3.d, p1, z3.d, z4.d +; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: umulh_v8i64: Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll @@ -48,8 +48,8 @@ define i8 @uaddv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: uaddv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: add z0.b, z1.b, z0.b @@ -406,8 +406,8 @@ define i8 @smaxv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: smaxv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: smax z0.b, p0/m, z0.b, z1.b @@ -752,8 +752,8 @@ define i8 @sminv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: sminv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: smin z0.b, p0/m, z0.b, z1.b @@ -1098,8 +1098,8 @@ define i8 @umaxv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: umaxv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: umax z0.b, p0/m, z0.b, z1.b @@ -1444,8 +1444,8 @@ define i8 @uminv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: uminv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: umin z0.b, p0/m, z0.b, z1.b Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll @@ -35,9 +35,9 @@ ; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0 ; VBITS_GE_256-NEXT: sunpklo z2.h, z1.b ; VBITS_GE_256-NEXT: sunpklo z3.h, z0.b -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sunpklo z2.s, z2.h ; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: umov w8, v2.h[0] @@ -66,9 +66,9 @@ ; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 def $z0 ; VBITS_GE_512-NEXT: sunpklo z2.h, z1.b ; VBITS_GE_512-NEXT: sunpklo z3.h, z0.b -; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: sunpklo z2.s, z2.h ; VBITS_GE_512-NEXT: sunpklo z3.s, z3.h +; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_512-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_512-NEXT: umov w8, v2.h[0] @@ -101,22 +101,22 @@ ; VBITS_GE_128-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_128-NEXT: sunpkhi z2.h, z1.b ; VBITS_GE_128-NEXT: sunpkhi z3.h, z0.b +; VBITS_GE_128-NEXT: sunpkhi z4.s, z2.h +; VBITS_GE_128-NEXT: sunpkhi z5.s, z3.h ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: sunpkhi z5.s, z2.h -; VBITS_GE_128-NEXT: sunpkhi z6.s, z3.h ; VBITS_GE_128-NEXT: sunpklo z2.s, z2.h ; VBITS_GE_128-NEXT: sunpklo z3.s, z3.h -; VBITS_GE_128-NEXT: sunpklo z4.h, z1.b +; VBITS_GE_128-NEXT: sdivr z4.s, p0/m, z4.s, z5.s ; VBITS_GE_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; VBITS_GE_128-NEXT: sunpklo z3.h, z0.b -; VBITS_GE_128-NEXT: sdivr z5.s, p0/m, z5.s, z6.s -; VBITS_GE_128-NEXT: sunpkhi z6.s, z4.h -; VBITS_GE_128-NEXT: sunpkhi z7.s, z3.h -; VBITS_GE_128-NEXT: sunpklo z4.s, z4.h +; VBITS_GE_128-NEXT: sunpklo z3.h, z1.b +; VBITS_GE_128-NEXT: sunpklo z5.h, z0.b +; VBITS_GE_128-NEXT: sunpkhi z6.s, z3.h +; VBITS_GE_128-NEXT: sunpkhi z7.s, z5.h ; VBITS_GE_128-NEXT: sunpklo z3.s, z3.h +; VBITS_GE_128-NEXT: sunpklo z5.s, z5.h ; VBITS_GE_128-NEXT: sdivr z6.s, p0/m, z6.s, z7.s -; VBITS_GE_128-NEXT: sdiv z3.s, p0/m, z3.s, z4.s -; VBITS_GE_128-NEXT: uzp1 z2.h, z2.h, z5.h +; VBITS_GE_128-NEXT: sdivr z3.s, p0/m, z3.s, z5.s +; VBITS_GE_128-NEXT: uzp1 z2.h, z2.h, z4.h ; VBITS_GE_128-NEXT: uzp1 z3.h, z3.h, z6.h ; VBITS_GE_128-NEXT: uzp1 z2.b, z3.b, z2.b ; VBITS_GE_128-NEXT: mls v0.16b, v2.16b, v1.16b @@ -129,9 +129,9 @@ ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_256-NEXT: sunpklo z2.h, z1.b ; VBITS_GE_256-NEXT: sunpklo z3.h, z0.b -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sunpkhi z4.s, z2.h ; VBITS_GE_256-NEXT: sunpkhi z5.s, z3.h +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sunpklo z2.s, z2.h ; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h ; VBITS_GE_256-NEXT: sdivr z4.s, p0/m, z4.s, z5.s @@ -148,9 +148,9 @@ ; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_512-NEXT: sunpklo z2.h, z1.b ; VBITS_GE_512-NEXT: sunpklo z3.h, z0.b -; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: sunpklo z2.s, z2.h ; VBITS_GE_512-NEXT: sunpklo z3.s, z3.h +; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_512-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_512-NEXT: uzp1 z2.b, z2.b, z2.b @@ -252,12 +252,12 @@ ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sdivr z6.s, p1/m, z6.s, z7.s -; CHECK-NEXT: sunpkhi z7.s, z4.h ; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s -; CHECK-NEXT: sunpkhi z3.s, z5.h +; CHECK-NEXT: sunpkhi z3.s, z4.h +; CHECK-NEXT: sunpkhi z7.s, z5.h ; CHECK-NEXT: sunpklo z4.s, z4.h ; CHECK-NEXT: sunpklo z5.s, z5.h -; CHECK-NEXT: sdiv z3.s, p1/m, z3.s, z7.s +; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z7.s ; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z6.h ; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h @@ -325,9 +325,9 @@ ; VBITS_GE_128: // %bb.0: ; VBITS_GE_128-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_128-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: sunpkhi z2.s, z1.h ; VBITS_GE_128-NEXT: sunpkhi z3.s, z0.h +; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: sunpklo z4.s, z1.h ; VBITS_GE_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_128-NEXT: sunpklo z5.s, z0.h @@ -342,9 +342,9 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h ; VBITS_GE_256-NEXT: sunpklo z3.s, z0.h +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: mls v0.8h, v2.8h, v1.8h @@ -355,9 +355,9 @@ ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: sunpklo z2.s, z1.h ; VBITS_GE_512-NEXT: sunpklo z3.s, z0.h +; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_512-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_512-NEXT: mls v0.8h, v2.8h, v1.8h @@ -372,21 +372,23 @@ ; VBITS_GE_128: // %bb.0: ; VBITS_GE_128-NEXT: ldp q2, q0, [x0] ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: sunpkhi z17.s, z2.h +; VBITS_GE_128-NEXT: sunpklo z17.s, z2.h ; VBITS_GE_128-NEXT: ldp q3, q1, [x1] ; VBITS_GE_128-NEXT: sunpkhi z5.s, z0.h ; VBITS_GE_128-NEXT: sunpklo z7.s, z0.h -; VBITS_GE_128-NEXT: sunpkhi z16.s, z3.h -; VBITS_GE_128-NEXT: sdivr z16.s, p0/m, z16.s, z17.s +; VBITS_GE_128-NEXT: sunpklo z16.s, z3.h ; VBITS_GE_128-NEXT: sunpkhi z4.s, z1.h ; VBITS_GE_128-NEXT: sunpklo z6.s, z1.h ; VBITS_GE_128-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; VBITS_GE_128-NEXT: sunpklo z5.s, z3.h +; VBITS_GE_128-NEXT: movprfx z5, z7 +; VBITS_GE_128-NEXT: sdiv z5.s, p0/m, z5.s, z6.s +; VBITS_GE_128-NEXT: sunpkhi z6.s, z3.h +; VBITS_GE_128-NEXT: sunpkhi z7.s, z2.h ; VBITS_GE_128-NEXT: sdivr z6.s, p0/m, z6.s, z7.s -; VBITS_GE_128-NEXT: sunpklo z7.s, z2.h -; VBITS_GE_128-NEXT: sdivr z5.s, p0/m, z5.s, z7.s -; VBITS_GE_128-NEXT: uzp1 z4.h, z6.h, z4.h -; VBITS_GE_128-NEXT: uzp1 z5.h, z5.h, z16.h +; VBITS_GE_128-NEXT: movprfx z7, z17 +; VBITS_GE_128-NEXT: sdiv z7.s, p0/m, z7.s, z16.s +; VBITS_GE_128-NEXT: uzp1 z4.h, z5.h, z4.h +; VBITS_GE_128-NEXT: uzp1 z5.h, z7.h, z6.h ; VBITS_GE_128-NEXT: mls v2.8h, v5.8h, v3.8h ; VBITS_GE_128-NEXT: mls v0.8h, v4.8h, v1.8h ; VBITS_GE_128-NEXT: stp q2, q0, [x0] @@ -699,7 +701,7 @@ ; VBITS_GE_128: // %bb.0: ; VBITS_GE_128-NEXT: ldp q4, q5, [x1] ; VBITS_GE_128-NEXT: ptrue p0.d, vl2 -; VBITS_GE_128-NEXT: ldp q7, q6, [x1, #32] +; VBITS_GE_128-NEXT: ldp q6, q7, [x1, #32] ; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] ; VBITS_GE_128-NEXT: ldp q2, q3, [x0] ; VBITS_GE_128-NEXT: movprfx z16, z3 @@ -707,15 +709,15 @@ ; VBITS_GE_128-NEXT: movprfx z17, z2 ; VBITS_GE_128-NEXT: sdiv z17.d, p0/m, z17.d, z4.d ; VBITS_GE_128-NEXT: mul z5.d, p0/m, z5.d, z16.d -; VBITS_GE_128-NEXT: movprfx z16, z1 -; VBITS_GE_128-NEXT: sdiv z16.d, p0/m, z16.d, z6.d ; VBITS_GE_128-NEXT: mul z4.d, p0/m, z4.d, z17.d +; VBITS_GE_128-NEXT: movprfx z16, z1 +; VBITS_GE_128-NEXT: sdiv z16.d, p0/m, z16.d, z7.d ; VBITS_GE_128-NEXT: movprfx z17, z0 -; VBITS_GE_128-NEXT: sdiv z17.d, p0/m, z17.d, z7.d -; VBITS_GE_128-NEXT: mul z6.d, p0/m, z6.d, z16.d -; VBITS_GE_128-NEXT: mul z7.d, p0/m, z7.d, z17.d -; VBITS_GE_128-NEXT: sub v0.2d, v0.2d, v7.2d -; VBITS_GE_128-NEXT: sub v1.2d, v1.2d, v6.2d +; VBITS_GE_128-NEXT: sdiv z17.d, p0/m, z17.d, z6.d +; VBITS_GE_128-NEXT: mul z7.d, p0/m, z7.d, z16.d +; VBITS_GE_128-NEXT: mul z6.d, p0/m, z6.d, z17.d +; VBITS_GE_128-NEXT: sub v0.2d, v0.2d, v6.2d +; VBITS_GE_128-NEXT: sub v1.2d, v1.2d, v7.2d ; VBITS_GE_128-NEXT: sub v2.2d, v2.2d, v4.2d ; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] ; VBITS_GE_128-NEXT: sub v0.2d, v3.2d, v5.2d @@ -822,9 +824,9 @@ ; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0 ; VBITS_GE_256-NEXT: uunpklo z2.h, z1.b ; VBITS_GE_256-NEXT: uunpklo z3.h, z0.b -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: umov w8, v2.h[0] @@ -853,9 +855,9 @@ ; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 def $z0 ; VBITS_GE_512-NEXT: uunpklo z2.h, z1.b ; VBITS_GE_512-NEXT: uunpklo z3.h, z0.b -; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_512-NEXT: uunpklo z3.s, z3.h +; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_512-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_512-NEXT: umov w8, v2.h[0] @@ -888,22 +890,22 @@ ; VBITS_GE_128-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_128-NEXT: uunpkhi z2.h, z1.b ; VBITS_GE_128-NEXT: uunpkhi z3.h, z0.b +; VBITS_GE_128-NEXT: uunpkhi z4.s, z2.h +; VBITS_GE_128-NEXT: uunpkhi z5.s, z3.h ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: uunpkhi z5.s, z2.h -; VBITS_GE_128-NEXT: uunpkhi z6.s, z3.h ; VBITS_GE_128-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_128-NEXT: uunpklo z3.s, z3.h -; VBITS_GE_128-NEXT: uunpklo z4.h, z1.b +; VBITS_GE_128-NEXT: udivr z4.s, p0/m, z4.s, z5.s ; VBITS_GE_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; VBITS_GE_128-NEXT: uunpklo z3.h, z0.b -; VBITS_GE_128-NEXT: udivr z5.s, p0/m, z5.s, z6.s -; VBITS_GE_128-NEXT: uunpkhi z6.s, z4.h -; VBITS_GE_128-NEXT: uunpkhi z7.s, z3.h -; VBITS_GE_128-NEXT: uunpklo z4.s, z4.h +; VBITS_GE_128-NEXT: uunpklo z3.h, z1.b +; VBITS_GE_128-NEXT: uunpklo z5.h, z0.b +; VBITS_GE_128-NEXT: uunpkhi z6.s, z3.h +; VBITS_GE_128-NEXT: uunpkhi z7.s, z5.h ; VBITS_GE_128-NEXT: uunpklo z3.s, z3.h +; VBITS_GE_128-NEXT: uunpklo z5.s, z5.h ; VBITS_GE_128-NEXT: udivr z6.s, p0/m, z6.s, z7.s -; VBITS_GE_128-NEXT: udiv z3.s, p0/m, z3.s, z4.s -; VBITS_GE_128-NEXT: uzp1 z2.h, z2.h, z5.h +; VBITS_GE_128-NEXT: udivr z3.s, p0/m, z3.s, z5.s +; VBITS_GE_128-NEXT: uzp1 z2.h, z2.h, z4.h ; VBITS_GE_128-NEXT: uzp1 z3.h, z3.h, z6.h ; VBITS_GE_128-NEXT: uzp1 z2.b, z3.b, z2.b ; VBITS_GE_128-NEXT: mls v0.16b, v2.16b, v1.16b @@ -916,9 +918,9 @@ ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_256-NEXT: uunpklo z2.h, z1.b ; VBITS_GE_256-NEXT: uunpklo z3.h, z0.b -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpkhi z4.s, z2.h ; VBITS_GE_256-NEXT: uunpkhi z5.s, z3.h +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h ; VBITS_GE_256-NEXT: udivr z4.s, p0/m, z4.s, z5.s @@ -935,9 +937,9 @@ ; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_512-NEXT: uunpklo z2.h, z1.b ; VBITS_GE_512-NEXT: uunpklo z3.h, z0.b -; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_512-NEXT: uunpklo z3.s, z3.h +; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_512-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_512-NEXT: uzp1 z2.b, z2.b, z2.b @@ -1039,12 +1041,12 @@ ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: udivr z6.s, p1/m, z6.s, z7.s -; CHECK-NEXT: uunpkhi z7.s, z4.h ; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s -; CHECK-NEXT: uunpkhi z3.s, z5.h +; CHECK-NEXT: uunpkhi z3.s, z4.h +; CHECK-NEXT: uunpkhi z7.s, z5.h ; CHECK-NEXT: uunpklo z4.s, z4.h ; CHECK-NEXT: uunpklo z5.s, z5.h -; CHECK-NEXT: udiv z3.s, p1/m, z3.s, z7.s +; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z7.s ; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z6.h ; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h @@ -1112,9 +1114,9 @@ ; VBITS_GE_128: // %bb.0: ; VBITS_GE_128-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_128-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: uunpkhi z2.s, z1.h ; VBITS_GE_128-NEXT: uunpkhi z3.s, z0.h +; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: uunpklo z4.s, z1.h ; VBITS_GE_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_128-NEXT: uunpklo z5.s, z0.h @@ -1129,9 +1131,9 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpklo z2.s, z1.h ; VBITS_GE_256-NEXT: uunpklo z3.s, z0.h +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: mls v0.8h, v2.8h, v1.8h @@ -1142,9 +1144,9 @@ ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: uunpklo z2.s, z1.h ; VBITS_GE_512-NEXT: uunpklo z3.s, z0.h +; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_512-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_512-NEXT: mls v0.8h, v2.8h, v1.8h @@ -1159,21 +1161,23 @@ ; VBITS_GE_128: // %bb.0: ; VBITS_GE_128-NEXT: ldp q2, q0, [x0] ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: uunpkhi z17.s, z2.h +; VBITS_GE_128-NEXT: uunpklo z17.s, z2.h ; VBITS_GE_128-NEXT: ldp q3, q1, [x1] ; VBITS_GE_128-NEXT: uunpkhi z5.s, z0.h ; VBITS_GE_128-NEXT: uunpklo z7.s, z0.h -; VBITS_GE_128-NEXT: uunpkhi z16.s, z3.h -; VBITS_GE_128-NEXT: udivr z16.s, p0/m, z16.s, z17.s +; VBITS_GE_128-NEXT: uunpklo z16.s, z3.h ; VBITS_GE_128-NEXT: uunpkhi z4.s, z1.h ; VBITS_GE_128-NEXT: uunpklo z6.s, z1.h ; VBITS_GE_128-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; VBITS_GE_128-NEXT: uunpklo z5.s, z3.h +; VBITS_GE_128-NEXT: movprfx z5, z7 +; VBITS_GE_128-NEXT: udiv z5.s, p0/m, z5.s, z6.s +; VBITS_GE_128-NEXT: uunpkhi z6.s, z3.h +; VBITS_GE_128-NEXT: uunpkhi z7.s, z2.h ; VBITS_GE_128-NEXT: udivr z6.s, p0/m, z6.s, z7.s -; VBITS_GE_128-NEXT: uunpklo z7.s, z2.h -; VBITS_GE_128-NEXT: udivr z5.s, p0/m, z5.s, z7.s -; VBITS_GE_128-NEXT: uzp1 z4.h, z6.h, z4.h -; VBITS_GE_128-NEXT: uzp1 z5.h, z5.h, z16.h +; VBITS_GE_128-NEXT: movprfx z7, z17 +; VBITS_GE_128-NEXT: udiv z7.s, p0/m, z7.s, z16.s +; VBITS_GE_128-NEXT: uzp1 z4.h, z5.h, z4.h +; VBITS_GE_128-NEXT: uzp1 z5.h, z7.h, z6.h ; VBITS_GE_128-NEXT: mls v2.8h, v5.8h, v3.8h ; VBITS_GE_128-NEXT: mls v0.8h, v4.8h, v1.8h ; VBITS_GE_128-NEXT: stp q2, q0, [x0] @@ -1486,7 +1490,7 @@ ; VBITS_GE_128: // %bb.0: ; VBITS_GE_128-NEXT: ldp q4, q5, [x1] ; VBITS_GE_128-NEXT: ptrue p0.d, vl2 -; VBITS_GE_128-NEXT: ldp q7, q6, [x1, #32] +; VBITS_GE_128-NEXT: ldp q6, q7, [x1, #32] ; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] ; VBITS_GE_128-NEXT: ldp q2, q3, [x0] ; VBITS_GE_128-NEXT: movprfx z16, z3 @@ -1494,15 +1498,15 @@ ; VBITS_GE_128-NEXT: movprfx z17, z2 ; VBITS_GE_128-NEXT: udiv z17.d, p0/m, z17.d, z4.d ; VBITS_GE_128-NEXT: mul z5.d, p0/m, z5.d, z16.d -; VBITS_GE_128-NEXT: movprfx z16, z1 -; VBITS_GE_128-NEXT: udiv z16.d, p0/m, z16.d, z6.d ; VBITS_GE_128-NEXT: mul z4.d, p0/m, z4.d, z17.d +; VBITS_GE_128-NEXT: movprfx z16, z1 +; VBITS_GE_128-NEXT: udiv z16.d, p0/m, z16.d, z7.d ; VBITS_GE_128-NEXT: movprfx z17, z0 -; VBITS_GE_128-NEXT: udiv z17.d, p0/m, z17.d, z7.d -; VBITS_GE_128-NEXT: mul z6.d, p0/m, z6.d, z16.d -; VBITS_GE_128-NEXT: mul z7.d, p0/m, z7.d, z17.d -; VBITS_GE_128-NEXT: sub v0.2d, v0.2d, v7.2d -; VBITS_GE_128-NEXT: sub v1.2d, v1.2d, v6.2d +; VBITS_GE_128-NEXT: udiv z17.d, p0/m, z17.d, z6.d +; VBITS_GE_128-NEXT: mul z7.d, p0/m, z7.d, z16.d +; VBITS_GE_128-NEXT: mul z6.d, p0/m, z6.d, z17.d +; VBITS_GE_128-NEXT: sub v0.2d, v0.2d, v6.2d +; VBITS_GE_128-NEXT: sub v1.2d, v1.2d, v7.2d ; VBITS_GE_128-NEXT: sub v2.2d, v2.2d, v4.2d ; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] ; VBITS_GE_128-NEXT: sub v0.2d, v3.2d, v5.2d Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll @@ -34,12 +34,12 @@ define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.b, vl32 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z2.b, w8 +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: and z2.b, z2.b, #0x1 ; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b @@ -55,31 +55,31 @@ define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: ptrue p1.b -; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] +; VBITS_GE_256-NEXT: mov w9, #32 +; VBITS_GE_256-NEXT: and w8, w2, #0x1 +; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x9] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] +; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x9] ; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.b, w9 +; VBITS_GE_256-NEXT: mov z4.b, w8 ; VBITS_GE_256-NEXT: and z4.b, z4.b, #0x1 +; VBITS_GE_256-NEXT: ptrue p1.b ; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z4.b, #0 -; VBITS_GE_256-NEXT: sel z1.b, p1, z1.b, z3.b ; VBITS_GE_256-NEXT: sel z0.b, p1, z0.b, z2.b -; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] +; VBITS_GE_256-NEXT: sel z1.b, p1, z1.b, z3.b +; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x9] ; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v64i8: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.b, vl64 +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] -; VBITS_GE_512-NEXT: ptrue p1.b ; VBITS_GE_512-NEXT: mov z2.b, w8 +; VBITS_GE_512-NEXT: ptrue p1.b ; VBITS_GE_512-NEXT: and z2.b, z2.b, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b @@ -95,12 +95,12 @@ define void @select_v128i8(<128 x i8>* %a, <128 x i8>* %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v128i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.b, vl128 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z2.b, w8 +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: and z2.b, z2.b, #0x1 ; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b @@ -116,12 +116,12 @@ define void @select_v256i8(<256 x i8>* %a, <256 x i8>* %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v256i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z2.b, w8 +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: and z2.b, z2.b, #0x1 ; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 ; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b @@ -163,12 +163,12 @@ define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h @@ -184,31 +184,31 @@ define void @select_v32i16(<32 x i16>* %a, <32 x i16>* %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: mov x9, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: ptrue p1.h -; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: and w8, w2, #0x1 +; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] +; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x9, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.h, w9 +; VBITS_GE_256-NEXT: mov z4.h, w8 ; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1 +; VBITS_GE_256-NEXT: ptrue p1.h ; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0 -; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h ; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h -; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h +; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x9, lsl #1] ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v32i16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] -; VBITS_GE_512-NEXT: ptrue p1.h ; VBITS_GE_512-NEXT: mov z2.h, w8 +; VBITS_GE_512-NEXT: ptrue p1.h ; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h @@ -224,12 +224,12 @@ define void @select_v64i16(<64 x i16>* %a, <64 x i16>* %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v64i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl64 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h @@ -245,12 +245,12 @@ define void @select_v128i16(<128 x i16>* %a, <128 x i16>* %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v128i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl128 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: and z2.h, z2.h, #0x1 ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 ; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h @@ -292,12 +292,12 @@ define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s @@ -313,31 +313,31 @@ define void @select_v16i32(<16 x i32>* %a, <16 x i32>* %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: mov x9, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: ptrue p1.s -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: and w8, w2, #0x1 +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.s, w9 +; VBITS_GE_256-NEXT: mov z4.s, w8 ; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1 +; VBITS_GE_256-NEXT: ptrue p1.s ; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0 -; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s ; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v16i32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] -; VBITS_GE_512-NEXT: ptrue p1.s ; VBITS_GE_512-NEXT: mov z2.s, w8 +; VBITS_GE_512-NEXT: ptrue p1.s ; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s @@ -353,12 +353,12 @@ define void @select_v32i32(<32 x i32>* %a, <32 x i32>* %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v32i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl32 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s @@ -374,12 +374,12 @@ define void @select_v64i32(<64 x i32>* %a, <64 x i32>* %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v64i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl64 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 ; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s @@ -421,12 +421,12 @@ define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d @@ -442,31 +442,31 @@ define void @select_v8i64(<8 x i64>* %a, <8 x i64>* %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: ptrue p1.d -; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: and w8, w2, #0x1 +; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.d, x9 +; VBITS_GE_256-NEXT: mov z4.d, x8 ; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1 +; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0 -; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d ; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x9, lsl #3] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v8i64: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] -; VBITS_GE_512-NEXT: ptrue p1.d ; VBITS_GE_512-NEXT: mov z2.d, x8 +; VBITS_GE_512-NEXT: ptrue p1.d ; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1 ; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d @@ -482,12 +482,12 @@ define void @select_v16i64(<16 x i64>* %a, <16 x i64>* %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v16i64: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d @@ -503,12 +503,12 @@ define void @select_v32i64(<32 x i64>* %a, <32 x i64>* %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v32i64: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.d, vl32 +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll @@ -50,8 +50,8 @@ define void @ashr_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: ashr_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -454,8 +454,8 @@ define void @lshr_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: lshr_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -856,8 +856,8 @@ define void @shl_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: shl_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll @@ -258,8 +258,8 @@ ; VBITS_GE_256-LABEL: ucvtf_v8i16_v8f64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s @@ -1200,8 +1200,8 @@ ; VBITS_GE_256-LABEL: scvtf_v8i16_v8f64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s Index: llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll @@ -50,8 +50,8 @@ define void @select_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: select_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] Index: llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll @@ -87,17 +87,17 @@ define <32 x float> @load_v32f32(<32 x float>* %a) #0 { ; VBITS_GE_256-LABEL: load_v32f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #16 -; VBITS_GE_256-NEXT: mov x10, #24 -; VBITS_GE_256-NEXT: mov x11, #8 +; VBITS_GE_256-NEXT: mov x9, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2] +; VBITS_GE_256-NEXT: mov x10, #16 +; VBITS_GE_256-NEXT: mov x11, #24 +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x10, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x11, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x10, lsl #2] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] -; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x11, lsl #2] +; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x11, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x10, lsl #2] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x9, lsl #2] ; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8] ; VBITS_GE_256-NEXT: ret ; @@ -132,44 +132,44 @@ ; VBITS_GE_256-LABEL: load_v64f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x9, #8 -; VBITS_GE_256-NEXT: mov x10, #48 -; VBITS_GE_256-NEXT: mov x11, #56 +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x10, #16 +; VBITS_GE_256-NEXT: mov x11, #24 ; VBITS_GE_256-NEXT: mov x12, #32 ; VBITS_GE_256-NEXT: mov x13, #40 -; VBITS_GE_256-NEXT: mov x14, #16 -; VBITS_GE_256-NEXT: mov x15, #24 -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x10, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x11, lsl #2] +; VBITS_GE_256-NEXT: mov x14, #48 +; VBITS_GE_256-NEXT: mov x15, #56 +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x14, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x15, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x12, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x13, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x14, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x15, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x10, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x11, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x11, lsl #2] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x10, lsl #2] +; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x15, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x14, lsl #2] ; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8, x13, lsl #2] ; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x12, lsl #2] -; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x8, x15, lsl #2] -; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x8, x14, lsl #2] +; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x8, x11, lsl #2] +; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x8, x10, lsl #2] ; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x8, x9, lsl #2] ; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x8] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: load_v64f32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov x9, #32 -; VBITS_GE_512-NEXT: mov x10, #48 -; VBITS_GE_512-NEXT: mov x11, #16 +; VBITS_GE_512-NEXT: mov x9, #16 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 -; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] -; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2] -; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2] +; VBITS_GE_512-NEXT: mov x10, #32 +; VBITS_GE_512-NEXT: mov x11, #48 +; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x10, lsl #2] +; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0, x11, lsl #2] +; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_512-NEXT: ld1w { z3.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x8, x10, lsl #2] -; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] -; VBITS_GE_512-NEXT: st1w { z2.s }, p0, [x8, x11, lsl #2] +; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x8, x11, lsl #2] +; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8, x10, lsl #2] +; VBITS_GE_512-NEXT: st1w { z2.s }, p0, [x8, x9, lsl #2] ; VBITS_GE_512-NEXT: st1w { z3.s }, p0, [x8] ; VBITS_GE_512-NEXT: ret ; Index: llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll @@ -51,8 +51,8 @@ define i8 @andv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: andv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d @@ -409,8 +409,8 @@ define i8 @eorv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: eorv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d @@ -767,8 +767,8 @@ define i8 @orv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: orv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d Index: llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -14,10 +14,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldrb w9, [x0, #1] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x0, #1] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v0.2s, v0.2s, #0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -65,9 +65,9 @@ ; VBITS_GE_256-NEXT: ldr d0, [x0] ; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: cmeq v0.8b, v0.8b, #0 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: cmeq v0.8b, v0.8b, #0 ; VBITS_GE_256-NEXT: zip2 v1.8b, v0.8b, v0.8b ; VBITS_GE_256-NEXT: zip1 v0.8b, v0.8b, v0.8b ; VBITS_GE_256-NEXT: shl v1.4h, v1.4h, #8 @@ -173,10 +173,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldrh w9, [x0, #2] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrh w8, [x0, #2] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v0.2s, v0.2s, #0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -222,9 +222,9 @@ ; VBITS_GE_256-NEXT: ldr q0, [x0] ; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s @@ -584,22 +584,22 @@ define void @masked_gather_v2f16(<2 x half>* %a, <2 x half*>* %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_gather_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: fcmeq v0.4h, v0.4h, #0.0 +; CHECK-NEXT: umov w8, v0.h[0] +; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-NEXT: sshr v0.2s, v0.2s, #16 +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: mov v1.h[0], w9 +; CHECK-NEXT: mov v1.h[1], w8 +; CHECK-NEXT: shl v0.4h, v1.4h, #15 ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: mov v0.h[0], w8 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s @@ -646,9 +646,9 @@ ; VBITS_GE_256-NEXT: ldr q0, [x0] ; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: fcmeq v0.8h, v0.8h, #0.0 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: fcmeq v0.8h, v0.8h, #0.0 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s Index: llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -12,22 +12,22 @@ define <2 x half> @masked_load_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_load_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0000000000000000 -; CHECK-NEXT: ldr s2, [x1] +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov w9, v1.s[1] -; CHECK-NEXT: mov v0.h[0], w8 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 +; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: fcmeq v0.4h, v0.4h, v1.4h +; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: umov w8, v0.h[0] +; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-NEXT: sshr v0.2s, v0.2s, #16 +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: mov v1.h[0], w9 +; CHECK-NEXT: mov v1.h[1], w8 +; CHECK-NEXT: shl v0.4h, v1.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] @@ -163,8 +163,8 @@ define <64 x i8> @masked_load_v64i8(<64 x i8>* %ap, <64 x i8>* %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w9, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w9, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x9] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x9] Index: llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -14,10 +14,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldrb w9, [x0, #1] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x0, #1] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v1.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: sshll v1.2d, v1.2s, #0 @@ -36,15 +36,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: cmeq v2.4h, v0.4h, #0 +; CHECK-NEXT: cmeq v1.4h, v0.4h, #0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1b { z0.d }, p0, [z1.d] +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1b { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <4 x i8>, <4 x i8>* %a %ptrs = load <4 x i8*>, <4 x i8*>* %b @@ -59,10 +59,10 @@ ; VBITS_GE_256-NEXT: ldr d0, [x0] ; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: cmeq v1.8b, v0.8b, #0 -; VBITS_GE_256-NEXT: zip1 v5.8b, v0.8b, v0.8b ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: cmeq v1.8b, v0.8b, #0 +; VBITS_GE_256-NEXT: zip1 v5.8b, v0.8b, v0.8b ; VBITS_GE_256-NEXT: zip1 v2.8b, v1.8b, v0.8b ; VBITS_GE_256-NEXT: zip2 v1.8b, v1.8b, v0.8b ; VBITS_GE_256-NEXT: zip2 v0.8b, v0.8b, v0.8b @@ -88,16 +88,16 @@ ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr d0, [x0] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] -; VBITS_GE_512-NEXT: cmeq v2.8b, v0.8b, #0 +; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_512-NEXT: cmeq v1.8b, v0.8b, #0 ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_512-NEXT: sunpklo z2.h, z2.b +; VBITS_GE_512-NEXT: sunpklo z1.h, z1.b ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_512-NEXT: sunpklo z2.s, z2.h -; VBITS_GE_512-NEXT: sunpklo z2.d, z2.s -; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [z1.d] +; VBITS_GE_512-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_512-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [z2.d] ; VBITS_GE_512-NEXT: ret %vals = load <8 x i8>, <8 x i8>* %a %ptrs = load <8 x i8*>, <8 x i8*>* %b @@ -111,16 +111,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl16 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: cmeq v2.16b, v0.16b, #0 +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: cmeq v1.16b, v0.16b, #0 ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: sunpklo z2.h, z2.b +; CHECK-NEXT: sunpklo z1.h, z1.b ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1b { z0.d }, p0, [z1.d] +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1b { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <16 x i8>, <16 x i8>* %a %ptrs = load <16 x i8*>, <16 x i8*>* %b @@ -161,10 +161,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldrh w9, [x0, #2] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrh w8, [x0, #2] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v1.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: sshll v1.2d, v1.2s, #0 @@ -183,14 +183,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: cmeq v2.4h, v0.4h, #0 +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: cmeq v1.4h, v0.4h, #0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1h { z0.d }, p0, [z1.d] +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1h { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <4 x i16>, <4 x i16>* %a %ptrs = load <4 x i16*>, <4 x i16*>* %b @@ -205,8 +205,8 @@ ; VBITS_GE_256-NEXT: ldr q0, [x0] ; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: cmeq v1.8h, v0.8h, #0 ; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: cmeq v1.8h, v0.8h, #0 ; VBITS_GE_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h @@ -228,14 +228,14 @@ ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] -; VBITS_GE_512-NEXT: cmeq v2.8h, v0.8h, #0 +; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_512-NEXT: cmeq v1.8h, v0.8h, #0 ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_512-NEXT: sunpklo z2.s, z2.h -; VBITS_GE_512-NEXT: sunpklo z2.d, z2.s -; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_512-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_512-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_512-NEXT: ret %vals = load <8 x i16>, <8 x i16>* %a %ptrs = load <8 x i16*>, <8 x i16*>* %b @@ -533,28 +533,28 @@ define void @masked_scatter_v2f16(<2 x half>* %a, <2 x half*>* %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_scatter_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0 -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: umov w8, v2.h[0] -; CHECK-NEXT: umov w9, v2.h[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov w9, v2.s[1] +; CHECK-NEXT: fcmeq v1.4h, v0.4h, #0.0 +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: umov w8, v1.h[0] +; CHECK-NEXT: umov w9, v1.h[1] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-NEXT: sshr v1.2s, v1.2s, #16 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov v2.h[0], w9 +; CHECK-NEXT: mov v2.h[1], w8 +; CHECK-NEXT: shl v1.4h, v2.4h, #15 ; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: mov v0.h[0], w8 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: sunpklo z0.d, z0.s -; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: uunpklo z0.d, z1.s +; CHECK-NEXT: cmlt v1.4h, v1.4h, #0 +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; CHECK-NEXT: st1h { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <2 x half>, <2 x half>* %a @@ -569,14 +569,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: fcmeq v2.4h, v0.4h, #0.0 +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: fcmeq v1.4h, v0.4h, #0.0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1h { z0.d }, p0, [z1.d] +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1h { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <4 x half>, <4 x half>* %a %ptrs = load <4 x half*>, <4 x half*>* %b @@ -591,8 +591,8 @@ ; VBITS_GE_256-NEXT: ldr q0, [x0] ; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: fcmeq v1.8h, v0.8h, #0.0 ; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: fcmeq v1.8h, v0.8h, #0.0 ; VBITS_GE_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s @@ -614,14 +614,14 @@ ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] -; VBITS_GE_512-NEXT: fcmeq v2.8h, v0.8h, #0.0 +; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_512-NEXT: fcmeq v1.8h, v0.8h, #0.0 ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_512-NEXT: sunpklo z2.s, z2.h -; VBITS_GE_512-NEXT: sunpklo z2.d, z2.s -; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_512-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_512-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_512-NEXT: ret %vals = load <8 x half>, <8 x half>* %a %ptrs = load <8 x half*>, <8 x half*>* %b @@ -700,12 +700,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: fcmeq v2.4s, v0.4s, #0.0 +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: fcmeq v1.4s, v0.4s, #0.0 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1w { z0.d }, p0, [z1.d] +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1w { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <4 x float>, <4 x float>* %a %ptrs = load <4 x float*>, <4 x float*>* %b Index: llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -12,25 +12,25 @@ define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_store_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0000000000000000 -; CHECK-NEXT: ldr s2, [x1] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: movi d2, #0000000000000000 +; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h -; CHECK-NEXT: umov w8, v2.h[0] -; CHECK-NEXT: umov w9, v2.h[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 -; CHECK-NEXT: sshr v2.2s, v2.2s, #16 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov w9, v2.s[1] -; CHECK-NEXT: mov v0.h[0], w8 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 -; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: st1h { z1.h }, p0, [x1] +; CHECK-NEXT: fcmeq v1.4h, v0.4h, v1.4h +; CHECK-NEXT: umov w8, v1.h[0] +; CHECK-NEXT: umov w9, v1.h[1] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-NEXT: sshr v1.2s, v1.2s, #16 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov v2.h[0], w9 +; CHECK-NEXT: mov v2.h[1], w8 +; CHECK-NEXT: shl v1.4h, v2.4h, #15 +; CHECK-NEXT: cmlt v1.4h, v1.4h, #0 +; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 +; CHECK-NEXT: st1h { z0.h }, p0, [x1] ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %ap %b = load <2 x half>, <2 x half>* %bp @@ -293,9 +293,9 @@ ; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: uzp1 z3.h, z3.h, z3.h ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h -; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b ; VBITS_GE_256-NEXT: uzp1 z3.b, z3.b, z3.b +; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: mov v3.d[1], v2.d[0] ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b Index: llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll @@ -167,9 +167,9 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] +; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: revh z0.d, p1/m, z0.d ; VBITS_GE_256-NEXT: revh z1.d, p1/m, z1.d ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] @@ -206,12 +206,12 @@ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: mov z1.d, z0.d[2] ; CHECK-NEXT: mov z2.d, z0.d[3] -; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: fmov x9, d2 -; CHECK-NEXT: fmov x11, d0 -; CHECK-NEXT: stp x9, x8, [sp, #16] -; CHECK-NEXT: stp x10, x11, [sp] +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: fmov x10, d2 +; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: stp x10, x9, [sp, #16] +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: stp x8, x9, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: mov sp, x29 @@ -241,22 +241,22 @@ ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 ; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: mov z1.s, z0.s[4] ; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: fmov w11, s1 +; CHECK-NEXT: mov z1.s, z0.s[5] +; CHECK-NEXT: fmov w12, s1 ; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: mov w11, v0.s[3] -; CHECK-NEXT: fmov w10, s0 -; CHECK-NEXT: mov z1.s, z0.s[4] -; CHECK-NEXT: mov z2.s, z0.s[5] -; CHECK-NEXT: mov z3.s, z0.s[6] -; CHECK-NEXT: mov z0.s, z0.s[7] -; CHECK-NEXT: stp w8, w10, [sp, #24] -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: stp w11, w9, [sp, #16] -; CHECK-NEXT: fmov w9, s3 +; CHECK-NEXT: mov w10, v0.s[3] +; CHECK-NEXT: mov z1.s, z0.s[6] +; CHECK-NEXT: stp w12, w11, [sp, #8] ; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: stp w8, w10, [sp, #8] -; CHECK-NEXT: stp w11, w9, [sp] +; CHECK-NEXT: mov z0.s, z0.s[7] +; CHECK-NEXT: stp w10, w9, [sp, #16] +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: stp w8, w11, [sp, #24] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: stp w9, w8, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: mov sp, x29 @@ -358,45 +358,45 @@ ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: mov z1.h, z0.h[8] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: mov z5.h, z0.h[12] ; CHECK-NEXT: mov z2.h, z0.h[9] +; CHECK-NEXT: fmov w12, s1 ; CHECK-NEXT: mov z3.h, z0.h[10] ; CHECK-NEXT: mov z4.h, z0.h[11] +; CHECK-NEXT: strh w11, [sp, #14] ; CHECK-NEXT: fmov w11, s2 -; CHECK-NEXT: strh w9, [sp, #30] -; CHECK-NEXT: fmov w9, s5 +; CHECK-NEXT: strh w12, [sp, #30] ; CHECK-NEXT: fmov w12, s3 -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z5.h, z0.h[12] ; CHECK-NEXT: mov z6.h, z0.h[13] +; CHECK-NEXT: strh w11, [sp, #28] +; CHECK-NEXT: fmov w11, s4 +; CHECK-NEXT: strh w12, [sp, #26] +; CHECK-NEXT: fmov w12, s5 ; CHECK-NEXT: mov z7.h, z0.h[14] ; CHECK-NEXT: mov z16.h, z0.h[15] -; CHECK-NEXT: umov w10, v0.h[1] -; CHECK-NEXT: strh w9, [sp, #22] -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: strh w11, [sp, #28] +; CHECK-NEXT: strh w11, [sp, #24] ; CHECK-NEXT: fmov w11, s6 -; CHECK-NEXT: strh w12, [sp, #26] +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: strh w12, [sp, #22] ; CHECK-NEXT: fmov w12, s7 -; CHECK-NEXT: strh w8, [sp, #24] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: strh w10, [sp, #12] ; CHECK-NEXT: strh w11, [sp, #20] -; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: fmov w11, s16 +; CHECK-NEXT: umov w10, v0.h[3] ; CHECK-NEXT: strh w12, [sp, #18] ; CHECK-NEXT: umov w12, v0.h[4] -; CHECK-NEXT: strh w8, [sp, #16] -; CHECK-NEXT: umov w8, v0.h[5] -; CHECK-NEXT: umov w10, v0.h[6] +; CHECK-NEXT: strh w11, [sp, #16] +; CHECK-NEXT: umov w11, v0.h[5] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: umov w8, v0.h[6] ; CHECK-NEXT: strh w9, [sp, #10] ; CHECK-NEXT: umov w9, v0.h[7] -; CHECK-NEXT: strh w11, [sp, #8] +; CHECK-NEXT: strh w10, [sp, #8] ; CHECK-NEXT: strh w12, [sp, #6] -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: strh w10, [sp, #2] +; CHECK-NEXT: strh w11, [sp, #4] +; CHECK-NEXT: strh w8, [sp, #2] ; CHECK-NEXT: strh w9, [sp] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -421,43 +421,43 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x9, sp, #48 ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 -; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: orr x9, x8, #0x1e -; CHECK-NEXT: orr x10, x8, #0x1c ; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: orr x12, x8, #0x10 -; CHECK-NEXT: orr x11, x8, #0x18 -; CHECK-NEXT: str h0, [sp, #22] +; CHECK-NEXT: orr x10, x8, #0x1c +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: st1 { v0.h }[4], [x9] +; CHECK-NEXT: orr x9, x8, #0x18 +; CHECK-NEXT: st1 { v0.h }[7], [x9] +; CHECK-NEXT: orr x9, x8, #0x10 +; CHECK-NEXT: st1 { v0.h }[3], [x9] ; CHECK-NEXT: orr x9, x8, #0xe -; CHECK-NEXT: st1 { v0.h }[5], [x10] -; CHECK-NEXT: orr x10, x8, #0xc -; CHECK-NEXT: st1 { v0.h }[3], [x12] -; CHECK-NEXT: mov w12, #26 ; CHECK-NEXT: st1 { v1.h }[4], [x9] +; CHECK-NEXT: orr x9, x8, #0xc +; CHECK-NEXT: st1 { v1.h }[5], [x9] ; CHECK-NEXT: orr x9, x8, #0x8 -; CHECK-NEXT: st1 { v0.h }[7], [x11] -; CHECK-NEXT: orr x11, x8, #0x2 -; CHECK-NEXT: st1 { v1.h }[5], [x10] -; CHECK-NEXT: orr x10, x8, #0x4 ; CHECK-NEXT: st1 { v1.h }[7], [x9] -; CHECK-NEXT: orr x9, x8, x12 -; CHECK-NEXT: st1 { v1.h }[2], [x11] -; CHECK-NEXT: mov w11, #10 -; CHECK-NEXT: st1 { v1.h }[1], [x10] -; CHECK-NEXT: mov w10, #18 +; CHECK-NEXT: orr x9, x8, #0x4 +; CHECK-NEXT: st1 { v0.h }[5], [x10] +; CHECK-NEXT: mov w10, #26 +; CHECK-NEXT: st1 { v1.h }[1], [x9] +; CHECK-NEXT: orr x9, x8, #0x2 +; CHECK-NEXT: st1 { v1.h }[2], [x9] +; CHECK-NEXT: orr x9, x8, x10 ; CHECK-NEXT: st1 { v0.h }[6], [x9] ; CHECK-NEXT: mov w9, #20 ; CHECK-NEXT: orr x9, x8, x9 -; CHECK-NEXT: orr x10, x8, x10 -; CHECK-NEXT: st1 { v1.h }[3], [x8] -; CHECK-NEXT: orr x8, x8, x11 -; CHECK-NEXT: str h1, [sp, #6] -; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w10, #18 ; CHECK-NEXT: st1 { v0.h }[1], [x9] -; CHECK-NEXT: st1 { v0.h }[2], [x10] +; CHECK-NEXT: orr x9, x8, x10 +; CHECK-NEXT: st1 { v0.h }[2], [x9] +; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: st1 { v1.h }[3], [x8] +; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: st1 { v1.h }[6], [x8] +; CHECK-NEXT: str h0, [sp, #22] +; CHECK-NEXT: str h1, [sp, #6] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [x2] ; CHECK-NEXT: mov sp, x29 Index: llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll @@ -50,8 +50,8 @@ define void @bitreverse_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: bitreverse_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: rbit z0.b, p0/m, z0.b Index: llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll @@ -46,8 +46,8 @@ define void @sdiv_v64i8(<64 x i8>* %a) #0 { ; VBITS_GE_256-LABEL: sdiv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: asrd z0.b, p0/m, z0.b, #5 Index: llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll @@ -19,8 +19,8 @@ ; CHECK-NEXT: sub x9, sp, #48 ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: stp s0, s0, [sp, #24] +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: stp s0, s0, [sp, #16] ; CHECK-NEXT: stp s0, s0, [sp, #8] ; CHECK-NEXT: stp s0, s0, [sp] @@ -45,14 +45,14 @@ ; CHECK-NEXT: // %bb.1: // %vector.body ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: umov w8, v0.b[8] -; CHECK-NEXT: umov w9, v0.b[1] +; CHECK-NEXT: umov w8, v0.b[1] +; CHECK-NEXT: umov w9, v0.b[8] +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: umov w10, v0.b[9] ; CHECK-NEXT: umov w11, v0.b[2] -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: mov v1.b[1], w8 +; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: umov w8, v0.b[10] -; CHECK-NEXT: mov v1.b[1], w9 ; CHECK-NEXT: umov w9, v0.b[3] ; CHECK-NEXT: mov v2.b[1], w10 ; CHECK-NEXT: umov w10, v0.b[11] @@ -74,49 +74,49 @@ ; CHECK-NEXT: mov x8, #16 ; CHECK-NEXT: mov v1.b[6], w9 ; CHECK-NEXT: mov x9, #24 -; CHECK-NEXT: ld1w { z4.s }, p0/z, [x0, x8, lsl #2] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, x8, lsl #2] +; CHECK-NEXT: ld1w { z5.s }, p0/z, [x0, x9, lsl #2] ; CHECK-NEXT: mov v2.b[6], w10 -; CHECK-NEXT: umov w10, v0.b[15] +; CHECK-NEXT: mov x10, #8 +; CHECK-NEXT: mov v1.b[7], w11 +; CHECK-NEXT: umov w11, v0.b[15] ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 -; CHECK-NEXT: ld1w { z5.s }, p0/z, [x0, x9, lsl #2] -; CHECK-NEXT: dup v3.2d, v0.d[1] +; CHECK-NEXT: dup v4.2d, v0.d[1] ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: mov v1.b[7], w11 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: mov v2.b[7], w10 -; CHECK-NEXT: uunpklo z3.h, z3.b -; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: mov x11, #8 +; CHECK-NEXT: uunpklo z4.h, z4.b ; CHECK-NEXT: lsl z0.s, z0.s, #31 -; CHECK-NEXT: lsl z3.s, z3.s, #31 +; CHECK-NEXT: mov v2.b[7], w11 +; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: lsl z4.s, z4.s, #31 ; CHECK-NEXT: asr z0.s, z0.s, #31 -; CHECK-NEXT: asr z3.s, z3.s, #31 +; CHECK-NEXT: asr z4.s, z4.s, #31 ; CHECK-NEXT: uunpklo z1.h, z1.b -; CHECK-NEXT: uunpklo z2.h, z2.b ; CHECK-NEXT: and z0.s, z0.s, #0x1 -; CHECK-NEXT: and z3.s, z3.s, #0x1 +; CHECK-NEXT: and z4.s, z4.s, #0x1 +; CHECK-NEXT: uunpklo z2.h, z2.b ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x11, lsl #2] -; CHECK-NEXT: cmpne p2.s, p0/z, z3.s, #0 -; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x10, lsl #2] +; CHECK-NEXT: cmpne p2.s, p0/z, z4.s, #0 +; CHECK-NEXT: ld1w { z4.s }, p0/z, [x0] ; CHECK-NEXT: lsl z1.s, z1.s, #31 ; CHECK-NEXT: lsl z2.s, z2.s, #31 ; CHECK-NEXT: asr z1.s, z1.s, #31 ; CHECK-NEXT: asr z2.s, z2.s, #31 ; CHECK-NEXT: and z1.s, z1.s, #0x1 ; CHECK-NEXT: and z2.s, z2.s, #0x1 -; CHECK-NEXT: mov z4.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z5.s, p2/m, #0 // =0x0 ; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 ; CHECK-NEXT: cmpne p2.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z4.s, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 -; CHECK-NEXT: st1w { z4.s }, p0, [x0, x8, lsl #2] +; CHECK-NEXT: st1w { z3.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: st1w { z5.s }, p0, [x0, x9, lsl #2] -; CHECK-NEXT: st1w { z0.s }, p0, [x0, x11, lsl #2] -; CHECK-NEXT: st1w { z3.s }, p0, [x0] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2] +; CHECK-NEXT: st1w { z4.s }, p0, [x0] ; CHECK-NEXT: .LBB1_2: // %exit ; CHECK-NEXT: ret %broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer Index: llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll @@ -34,8 +34,8 @@ define void @splat_v32i8(i8 %a, <32 x i8>* %b) vscale_range(2,0) #0 { ; CHECK-LABEL: splat_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl32 ; CHECK-NEXT: mov z0.b, w0 +; CHECK-NEXT: ptrue p0.b, vl32 ; CHECK-NEXT: st1b { z0.b }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <32 x i8> undef, i8 %a, i64 0 @@ -47,17 +47,17 @@ define void @splat_v64i8(i8 %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: splat_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: mov z0.b, w0 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1] ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x8] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v64i8: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: ptrue p0.b, vl64 ; VBITS_GE_512-NEXT: mov z0.b, w0 +; VBITS_GE_512-NEXT: ptrue p0.b, vl64 ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x1] ; VBITS_GE_512-NEXT: ret %insert = insertelement <64 x i8> undef, i8 %a, i64 0 @@ -69,8 +69,8 @@ define void @splat_v128i8(i8 %a, <128 x i8>* %b) vscale_range(8,0) #0 { ; CHECK-LABEL: splat_v128i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl128 ; CHECK-NEXT: mov z0.b, w0 +; CHECK-NEXT: ptrue p0.b, vl128 ; CHECK-NEXT: st1b { z0.b }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <128 x i8> undef, i8 %a, i64 0 @@ -82,8 +82,8 @@ define void @splat_v256i8(i8 %a, <256 x i8>* %b) vscale_range(16,0) #0 { ; CHECK-LABEL: splat_v256i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl256 ; CHECK-NEXT: mov z0.b, w0 +; CHECK-NEXT: ptrue p0.b, vl256 ; CHECK-NEXT: st1b { z0.b }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <256 x i8> undef, i8 %a, i64 0 @@ -117,8 +117,8 @@ define void @splat_v16i16(i16 %a, <16 x i16>* %b) vscale_range(2,0) #0 { ; CHECK-LABEL: splat_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h, vl16 ; CHECK-NEXT: mov z0.h, w0 +; CHECK-NEXT: ptrue p0.h, vl16 ; CHECK-NEXT: st1h { z0.h }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <16 x i16> undef, i16 %a, i64 0 @@ -130,17 +130,17 @@ define void @splat_v32i16(i16 %a, <32 x i16>* %b) #0 { ; VBITS_GE_256-LABEL: splat_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: mov z0.h, w0 +; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1] ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] +; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v32i16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: ptrue p0.h, vl32 ; VBITS_GE_512-NEXT: mov z0.h, w0 +; VBITS_GE_512-NEXT: ptrue p0.h, vl32 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1] ; VBITS_GE_512-NEXT: ret %insert = insertelement <32 x i16> undef, i16 %a, i64 0 @@ -152,8 +152,8 @@ define void @splat_v64i16(i16 %a, <64 x i16>* %b) vscale_range(8,0) #0 { ; CHECK-LABEL: splat_v64i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h, vl64 ; CHECK-NEXT: mov z0.h, w0 +; CHECK-NEXT: ptrue p0.h, vl64 ; CHECK-NEXT: st1h { z0.h }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <64 x i16> undef, i16 %a, i64 0 @@ -165,8 +165,8 @@ define void @splat_v128i16(i16 %a, <128 x i16>* %b) vscale_range(16,0) #0 { ; CHECK-LABEL: splat_v128i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h, vl128 ; CHECK-NEXT: mov z0.h, w0 +; CHECK-NEXT: ptrue p0.h, vl128 ; CHECK-NEXT: st1h { z0.h }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <128 x i16> undef, i16 %a, i64 0 @@ -200,8 +200,8 @@ define void @splat_v8i32(i32 %a, <8 x i32>* %b) vscale_range(2,0) #0 { ; CHECK-LABEL: splat_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: st1w { z0.s }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <8 x i32> undef, i32 %a, i64 0 @@ -213,17 +213,17 @@ define void @splat_v16i32(i32 %a, <16 x i32>* %b) #0 { ; VBITS_GE_256-LABEL: splat_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: mov z0.s, w0 +; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v16i32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: mov z0.s, w0 +; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] ; VBITS_GE_512-NEXT: ret %insert = insertelement <16 x i32> undef, i32 %a, i64 0 @@ -235,8 +235,8 @@ define void @splat_v32i32(i32 %a, <32 x i32>* %b) vscale_range(8,0) #0 { ; CHECK-LABEL: splat_v32i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: st1w { z0.s }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <32 x i32> undef, i32 %a, i64 0 @@ -248,8 +248,8 @@ define void @splat_v64i32(i32 %a, <64 x i32>* %b) vscale_range(16,0) #0 { ; CHECK-LABEL: splat_v64i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s, vl64 ; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: ptrue p0.s, vl64 ; CHECK-NEXT: st1w { z0.s }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <64 x i32> undef, i32 %a, i64 0 @@ -283,8 +283,8 @@ define void @splat_v4i64(i64 %a, <4 x i64>* %b) vscale_range(2,0) #0 { ; CHECK-LABEL: splat_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <4 x i64> undef, i64 %a, i64 0 @@ -296,17 +296,17 @@ define void @splat_v8i64(i64 %a, <8 x i64>* %b) #0 { ; VBITS_GE_256-LABEL: splat_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: mov z0.d, x0 +; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v8i64: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: ptrue p0.d, vl8 ; VBITS_GE_512-NEXT: mov z0.d, x0 +; VBITS_GE_512-NEXT: ptrue p0.d, vl8 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_512-NEXT: ret %insert = insertelement <8 x i64> undef, i64 %a, i64 0 @@ -318,8 +318,8 @@ define void @splat_v16i64(i64 %a, <16 x i64>* %b) vscale_range(8,0) #0 { ; CHECK-LABEL: splat_v16i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d, vl16 ; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: ptrue p0.d, vl16 ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <16 x i64> undef, i64 %a, i64 0 @@ -331,8 +331,8 @@ define void @splat_v32i64(i64 %a, <32 x i64>* %b) vscale_range(16,0) #0 { ; CHECK-LABEL: splat_v32i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d, vl32 ; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: ptrue p0.d, vl32 ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret %insert = insertelement <32 x i64> undef, i64 %a, i64 0 @@ -386,12 +386,12 @@ define void @splat_v32f16(half %a, <32 x half>* %b) #0 { ; VBITS_GE_256-LABEL: splat_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0 +; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 ; VBITS_GE_256-NEXT: mov z0.h, h0 -; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v32f16: @@ -476,12 +476,12 @@ define void @splat_v16f32(float %a, <16 x float>* %b) #0 { ; VBITS_GE_256-LABEL: splat_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0 +; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: mov z0.s, s0 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v16f32: @@ -564,12 +564,12 @@ define void @splat_v8f64(double %a, <8 x double>* %b) #0 { ; VBITS_GE_256-LABEL: splat_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0 +; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: mov z0.d, d0 -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v8f64: Index: llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll @@ -55,8 +55,8 @@ ; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: mov z0.s, #0 // =0x0 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: store_v16f32: @@ -87,13 +87,13 @@ ; VBITS_GE_256-LABEL: store_v32f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #24 -; VBITS_GE_256-NEXT: mov x9, #16 -; VBITS_GE_256-NEXT: mov x10, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: mov z0.s, #0 // =0x0 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #16 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; @@ -102,8 +102,8 @@ ; VBITS_GE_512-NEXT: mov x8, #16 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: mov z0.s, #0 // =0x0 -; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret ; ; VBITS_GE_1024-LABEL: store_v32f32: @@ -127,34 +127,34 @@ ; VBITS_GE_256-LABEL: store_v64f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: mov x8, #56 -; VBITS_GE_256-NEXT: mov x9, #48 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: mov z0.s, #0 // =0x0 -; VBITS_GE_256-NEXT: mov x10, #40 -; VBITS_GE_256-NEXT: mov x11, #32 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #48 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #40 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #32 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: mov x8, #24 -; VBITS_GE_256-NEXT: mov x12, #16 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] -; VBITS_GE_256-NEXT: mov x9, #8 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x11, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x12, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #16 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: store_v64f32: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: mov x8, #48 -; VBITS_GE_512-NEXT: mov x9, #32 -; VBITS_GE_512-NEXT: mov x10, #16 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: mov z0.s, #0 // =0x0 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] -; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2] +; VBITS_GE_512-NEXT: mov x8, #32 +; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_512-NEXT: mov x8, #16 +; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret ; @@ -163,8 +163,8 @@ ; VBITS_GE_1024-NEXT: mov x8, #32 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 ; VBITS_GE_1024-NEXT: mov z0.s, #0 // =0x0 -; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_1024-NEXT: ret ; ; VBITS_GE_2048-LABEL: store_v64f32: Index: llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll @@ -50,8 +50,8 @@ define void @shuffle_ext_byone_v64i8(<64 x i8>* %a, <64 x i8>* %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1] Index: llvm/test/CodeGen/AArch64/sve-fp-reduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fp-reduce.ll +++ llvm/test/CodeGen/AArch64/sve-fp-reduce.ll @@ -47,11 +47,11 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI3_0 ; CHECK-NEXT: ld1rh { z0.d }, p1/z, [x8] ; CHECK-NEXT: st1h { z0.d }, p1, [sp, #3, mul vl] ; CHECK-NEXT: fmov s0, s1 @@ -73,11 +73,11 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z1.h }, p0, [sp] +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI4_0 ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: ld1rh { z1.d }, p1/z, [x8] ; CHECK-NEXT: addvl x8, sp, #1 @@ -102,9 +102,9 @@ define half @fadda_nxv12f16( %v, half %s) { ; CHECK-LABEL: fadda_nxv12f16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI5_0 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: ld1rh { z3.s }, p0/z, [x8] Index: llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll +++ llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll @@ -27,10 +27,10 @@ define void @scatter_i16_index_offset_minimum(i16* %base, i64 %offset, %pg, %data) #0 { ; CHECK-LABEL: scatter_i16_index_offset_minimum: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-33554432 -; CHECK-NEXT: add x9, x0, x1, lsl #1 -; CHECK-NEXT: index z1.s, #0, w8 -; CHECK-NEXT: st1h { z0.s }, p0, [x9, z1.s, sxtw #1] +; CHECK-NEXT: add x8, x0, x1, lsl #1 +; CHECK-NEXT: mov w9, #-33554432 +; CHECK-NEXT: index z1.s, #0, w9 +; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, sxtw #1] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -48,8 +48,8 @@ define @gather_i8_index_offset_8(i8* %base, i64 %offset, %pg) #0 { ; CHECK-LABEL: gather_i8_index_offset_8: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x1 ; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: add x8, x0, x1 ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x8, z0.s, sxtw] ; CHECK-NEXT: ret %splat.insert0 = insertelement undef, i64 %offset, i32 0 @@ -74,9 +74,9 @@ ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: mov z3.d, x1 ; CHECK-NEXT: mov z2.d, z1.d -; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: incd z2.d +; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mla z3.d, p1/m, z1.d, z3.d ; CHECK-NEXT: mla z4.d, p1/m, z2.d, z4.d ; CHECK-NEXT: punpklo p1.h, p0.b @@ -105,18 +105,18 @@ ; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: mov w9, #67108864 ; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: add x10, x0, x1 ; CHECK-NEXT: punpklo p1.h, p0.b ; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: mov w9, #33554432 -; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: index z1.d, #0, x9 +; CHECK-NEXT: add x9, x0, x1 +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: st1b { z3.d }, p1, [x9, z1.d] ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: st1b { z3.d }, p1, [x10, z1.d] ; CHECK-NEXT: add z2.d, z1.d, z2.d -; CHECK-NEXT: st1b { z0.d }, p0, [x10, z2.d] +; CHECK-NEXT: st1b { z0.d }, p0, [x9, z2.d] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -138,18 +138,18 @@ ; CHECK-NEXT: mov x9, #-2 ; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: movk x9, #64511, lsl #16 -; CHECK-NEXT: add x10, x0, x1 ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: mov x9, #-33554433 -; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: index z1.d, #0, x9 +; CHECK-NEXT: add x9, x0, x1 ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: index z1.d, #0, x9 +; CHECK-NEXT: st1b { z3.d }, p1, [x9, z1.d] ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: st1b { z3.d }, p1, [x10, z1.d] ; CHECK-NEXT: add z2.d, z1.d, z2.d -; CHECK-NEXT: st1b { z0.d }, p0, [x10, z2.d] +; CHECK-NEXT: st1b { z0.d }, p0, [x9, z2.d] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -170,18 +170,18 @@ ; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: mov x9, #-9223372036854775808 ; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: add x10, x0, x1 ; CHECK-NEXT: punpklo p1.h, p0.b ; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: mov x9, #4611686018427387904 -; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: index z1.d, #0, x9 +; CHECK-NEXT: add x9, x0, x1 +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: st1b { z3.d }, p1, [x9, z1.d] ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: st1b { z3.d }, p1, [x10, z1.d] ; CHECK-NEXT: add z2.d, z1.d, z2.d -; CHECK-NEXT: st1b { z0.d }, p0, [x10, z2.d] +; CHECK-NEXT: st1b { z0.d }, p0, [x9, z2.d] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -262,10 +262,10 @@ define void @scatter_f16_index_offset_8([8 x half]* %base, i64 %offset, %pg, %data) #0 { ; CHECK-LABEL: scatter_f16_index_offset_8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: add x9, x0, x1, lsl #4 -; CHECK-NEXT: index z1.s, #0, w8 -; CHECK-NEXT: st1h { z0.s }, p0, [x9, z1.s, sxtw] +; CHECK-NEXT: add x8, x0, x1, lsl #4 +; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: index z1.s, #0, w9 +; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, sxtw] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -281,11 +281,11 @@ define void @scatter_f16_index_add_add([8 x half]* %base, i64 %offset, i64 %offset2, %pg, %data) #0 { ; CHECK-LABEL: scatter_f16_index_add_add: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: add x9, x0, x2, lsl #4 -; CHECK-NEXT: add x9, x9, x1, lsl #4 -; CHECK-NEXT: index z1.s, #0, w8 -; CHECK-NEXT: st1h { z0.s }, p0, [x9, z1.s, sxtw] +; CHECK-NEXT: add x8, x0, x2, lsl #4 +; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: index z1.s, #0, w9 +; CHECK-NEXT: add x8, x8, x1, lsl #4 +; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, sxtw] ; CHECK-NEXT: ret %splat.offset.ins = insertelement undef, i64 %offset, i32 0 %splat.offset = shufflevector %splat.offset.ins, undef, zeroinitializer @@ -304,11 +304,11 @@ define void @scatter_f16_index_add_add_mul([8 x half]* %base, i64 %offset, i64 %offset2, %pg, %data) #0 { ; CHECK-LABEL: scatter_f16_index_add_add_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 -; CHECK-NEXT: add x9, x0, x2, lsl #7 -; CHECK-NEXT: add x9, x9, x1, lsl #7 -; CHECK-NEXT: index z1.s, #0, w8 -; CHECK-NEXT: st1h { z0.s }, p0, [x9, z1.s, sxtw] +; CHECK-NEXT: add x8, x0, x2, lsl #7 +; CHECK-NEXT: mov w9, #128 +; CHECK-NEXT: index z1.s, #0, w9 +; CHECK-NEXT: add x8, x8, x1, lsl #7 +; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, sxtw] ; CHECK-NEXT: ret %splat.offset.ins = insertelement undef, i64 %offset, i32 0 %splat.offset = shufflevector %splat.offset.ins, undef, zeroinitializer Index: llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll +++ llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll @@ -177,13 +177,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: uminv d0, p0, z0.d -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z0.d, x8 -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z0.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d ; CHECK-NEXT: mov z0.d, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p0/m, x9 +; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %t1 = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, %a) %t2 = insertelement zeroinitializer, i64 %t1, i64 1 @@ -195,8 +195,8 @@ define @zero_fill_type_mismatch( %pg, %a) #0 { ; CHECK-LABEL: zero_fill_type_mismatch: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #0 // =0x0 ; CHECK-NEXT: uminv d0, p0, z0.d +; CHECK-NEXT: mov z1.d, #0 // =0x0 ; CHECK-NEXT: ret %t1 = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, %a) %t2 = insertelement zeroinitializer, i64 %t1, i64 0 Index: llvm/test/CodeGen/AArch64/sve-insert-element.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -4,8 +4,8 @@ define @test_lane0_16xi8( %a) { ; CHECK-LABEL: test_lane0_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: ptrue p0.b, vl1 +; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %b = insertelement %a, i8 30, i32 0 @@ -15,8 +15,8 @@ define @test_lane0_8xi16( %a) { ; CHECK-LABEL: test_lane0_8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: ptrue p0.h, vl1 +; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: ret %b = insertelement %a, i16 30, i32 0 @@ -26,8 +26,8 @@ define @test_lane0_4xi32( %a) { ; CHECK-LABEL: test_lane0_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: ptrue p0.s, vl1 +; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: ret %b = insertelement %a, i32 30, i32 0 @@ -37,8 +37,8 @@ define @test_lane0_2xi64( %a) { ; CHECK-LABEL: test_lane0_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %b = insertelement %a, i64 30, i32 0 @@ -83,12 +83,12 @@ ; CHECK-LABEL: test_lane4_2xi64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #30 ; CHECK-NEXT: index z2.d, #0, #1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z1.d -; CHECK-NEXT: mov z0.d, p0/m, x9 +; CHECK-NEXT: mov w8, #30 +; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %b = insertelement %a, i64 30, i32 4 ret %b @@ -100,9 +100,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #9 ; CHECK-NEXT: fmov h1, #1.00000000 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: index z3.h, #0, #1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: cmpeq p0.h, p0/z, z3.h, z2.h ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret @@ -114,12 +114,12 @@ ; CHECK-LABEL: test_lane1_16xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: mov w9, #30 ; CHECK-NEXT: index z2.b, #0, #1 -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b -; CHECK-NEXT: mov z0.b, p0/m, w9 +; CHECK-NEXT: mov w8, #30 +; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %b = insertelement %a, i8 30, i32 1 ret %b @@ -130,12 +130,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: mov w9, #30 ; CHECK-NEXT: index z2.b, #0, #1 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b -; CHECK-NEXT: mov z0.b, p0/m, w9 +; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %b = insertelement %a, i8 30, i32 %x ret %b @@ -157,8 +157,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #6 ; CHECK-NEXT: index z1.h, #0, #1 -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z0.h ; CHECK-NEXT: mov z0.h, p0/m, w0 ; CHECK-NEXT: ret @@ -192,9 +192,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64 ; CHECK-NEXT: index z3.b, #0, #1 -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: whilels p0.b, xzr, x8 ; CHECK-NEXT: mov z2.b, w8 +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: lastb w8, p0, z1.b ; CHECK-NEXT: cmpeq p0.b, p1/z, z3.b, z2.b ; CHECK-NEXT: mov z0.b, p0/m, w8 @@ -207,13 +207,13 @@ define @test_insert3_of_extract1_16xi8( %a, %b) { ; CHECK-LABEL: test_insert3_of_extract1_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: umov w9, v1.b[1] +; CHECK-NEXT: umov w8, v1.b[1] +; CHECK-NEXT: mov w9, #3 +; CHECK-NEXT: mov z1.b, w9 ; CHECK-NEXT: index z2.b, #0, #1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z1.b, w8 ; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b -; CHECK-NEXT: mov z0.b, p0/m, w9 +; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %c = extractelement %b, i32 1 %d = insertelement %a, i8 %c, i32 3 @@ -278,10 +278,10 @@ define @test_insert_with_index_nxv2f16(half %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: mov z2.d, x0 +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: index z2.d, #0, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z1.d ; CHECK-NEXT: mov z0.h, p0/m, h0 ; CHECK-NEXT: ret %res = insertelement undef, half %h, i64 %idx @@ -291,10 +291,10 @@ define @test_insert_with_index_nxv4f16(half %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.s, #0, #1 -; CHECK-NEXT: mov z2.s, w0 +; CHECK-NEXT: mov z1.s, w0 +; CHECK-NEXT: index z2.s, #0, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z1.s ; CHECK-NEXT: mov z0.h, p0/m, h0 ; CHECK-NEXT: ret %res = insertelement undef, half %h, i64 %idx @@ -304,10 +304,10 @@ define @test_insert_with_index_nxv8f16(half %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.h, #0, #1 -; CHECK-NEXT: mov z2.h, w0 +; CHECK-NEXT: mov z1.h, w0 +; CHECK-NEXT: index z2.h, #0, #1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z1.h ; CHECK-NEXT: mov z0.h, p0/m, h0 ; CHECK-NEXT: ret %res = insertelement undef, half %h, i64 %idx @@ -317,10 +317,10 @@ define @test_insert_with_index_nxv2f32(float %f, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: mov z2.d, x0 +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: index z2.d, #0, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z1.d ; CHECK-NEXT: mov z0.s, p0/m, s0 ; CHECK-NEXT: ret %res = insertelement undef, float %f, i64 %idx @@ -330,10 +330,10 @@ define @test_insert_with_index_nxv4f32(float %f, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.s, #0, #1 -; CHECK-NEXT: mov z2.s, w0 +; CHECK-NEXT: mov z1.s, w0 +; CHECK-NEXT: index z2.s, #0, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z1.s ; CHECK-NEXT: mov z0.s, p0/m, s0 ; CHECK-NEXT: ret %res = insertelement undef, float %f, i64 %idx @@ -343,10 +343,10 @@ define @test_insert_with_index_nxv2f64(double %d, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: mov z2.d, x0 +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: index z2.d, #0, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z1.d ; CHECK-NEXT: mov z0.d, p0/m, d0 ; CHECK-NEXT: ret %res = insertelement undef, double %d, i64 %idx @@ -357,10 +357,10 @@ define @test_predicate_insert_2xi1_immediate ( %val, i1 %elt) { ; CHECK-LABEL: test_predicate_insert_2xi1_immediate: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d, vl1 ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ptrue p0.d, vl1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: mov z0.d, p1/m, x0 +; CHECK-NEXT: mov z0.d, p0/m, x0 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z0.d, z0.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -374,8 +374,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: index z1.s, #0, #1 -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z0.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z0.s, p2/m, w0 @@ -391,13 +391,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: mov w9, #1 -; CHECK-NEXT: index z1.h, #0, #1 +; CHECK-NEXT: index z0.h, #0, #1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: cmpeq p2.h, p1/z, z1.h, z0.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h ; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.h, p2/m, w9 +; CHECK-NEXT: mov z0.h, p2/m, w8 ; CHECK-NEXT: and z0.h, z0.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0 ; CHECK-NEXT: ret @@ -409,10 +409,10 @@ ; CHECK-LABEL: test_predicate_insert_16xi1_immediate: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: index z1.b, #0, #1 -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: mov z0.b, w9 +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: cmpeq p2.b, p1/z, z1.b, z0.b ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z0.b, p2/m, w8 @@ -504,18 +504,18 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: addvl x9, x9, #2 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 +; CHECK-NEXT: cmp x8, x9 ; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl] ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: st1b { z0.b }, p1, [sp] -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: csel x8, x9, x8, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: strb w0, [x9, x8] ; CHECK-NEXT: ld1b { z0.b }, p1/z, [sp] ; CHECK-NEXT: ld1b { z1.b }, p1/z, [sp, #1, mul vl] Index: llvm/test/CodeGen/AArch64/sve-insert-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -121,13 +121,13 @@ ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov x8, #-16 ; CHECK-NEXT: mov w9, #16 -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: cmp x8, #16 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: str q1, [x10, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -397,10 +397,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: subs x8, x8, #4 +; CHECK-NEXT: cntd x8 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: subs x8, x8, #4 ; CHECK-NEXT: csel x8, xzr, x8, lo ; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: cmp x8, #4 @@ -475,8 +475,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #2 Index: llvm/test/CodeGen/AArch64/sve-int-arith.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-int-arith.ll +++ llvm/test/CodeGen/AArch64/sve-int-arith.ll @@ -165,9 +165,9 @@ ; CHECK-LABEL: abs_nxv8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: abs z2.d, p0/m, z2.d ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: abs z1.d, p0/m, z1.d +; CHECK-NEXT: abs z2.d, p0/m, z2.d ; CHECK-NEXT: abs z3.d, p0/m, z3.d ; CHECK-NEXT: ret %res = call @llvm.abs.nxv8i64( %a, i1 false) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll @@ -111,9 +111,9 @@ define i32 @inch_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: inch_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #5 -; NO_SCALAR_INC-NEXT: cnth x9, vl8 -; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cnth x8, vl8 +; NO_SCALAR_INC-NEXT: mov w9, #5 +; NO_SCALAR_INC-NEXT: madd w0, w8, w9, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: inch_mul: @@ -155,9 +155,9 @@ define i32 @dech_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: dech_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #7 -; NO_SCALAR_INC-NEXT: cnth x9, vl16 -; NO_SCALAR_INC-NEXT: msub w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cnth x8, vl16 +; NO_SCALAR_INC-NEXT: mov w9, #7 +; NO_SCALAR_INC-NEXT: msub w0, w8, w9, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: dech_mul: @@ -199,9 +199,9 @@ define i32 @incw_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: incw_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #12 -; NO_SCALAR_INC-NEXT: cntw x9, vl32 -; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cntw x8, vl32 +; NO_SCALAR_INC-NEXT: mov w9, #12 +; NO_SCALAR_INC-NEXT: madd w0, w8, w9, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: incw_mul: @@ -284,9 +284,9 @@ define i32 @incd_mul(i32 %base) { ; NO_SCALAR_INC-LABEL: incd_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #15 -; NO_SCALAR_INC-NEXT: cntd x9, vl64 -; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cntd x8, vl64 +; NO_SCALAR_INC-NEXT: mov w9, #15 +; NO_SCALAR_INC-NEXT: madd w0, w8, w9, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: incd_mul: @@ -328,9 +328,9 @@ define i32 @decd_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: decd_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #9 -; NO_SCALAR_INC-NEXT: cntd x9, vl2 -; NO_SCALAR_INC-NEXT: msub w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cntd x8, vl2 +; NO_SCALAR_INC-NEXT: mov w9, #9 +; NO_SCALAR_INC-NEXT: msub w0, w8, w9, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: decd_mul: Index: llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll @@ -238,12 +238,12 @@ define @index_rr_i32_not_combine(i32 %a, i32 %b) { ; CHECK-LABEL: index_rr_i32_not_combine: ; CHECK: // %bb.0: -; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: mov z1.s, w0 -; CHECK-NEXT: mov z2.s, w1 +; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: mov z1.s, w1 +; CHECK-NEXT: index z2.s, #0, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mla z1.s, p0/m, z0.s, z2.s -; CHECK-NEXT: add z0.s, z1.s, z0.s +; CHECK-NEXT: mla z0.s, p0/m, z2.s, z1.s +; CHECK-NEXT: add z0.s, z0.s, z2.s ; CHECK-NEXT: ret %val = insertelement poison, i32 %a, i32 0 %val1 = shufflevector %val, poison, zeroinitializer Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll @@ -425,9 +425,9 @@ define @add_i64_tuple4(* %out, %in1, %in2, %in3, %in4) { ; CHECK-LABEL: add_i64_tuple4: ; CHECK: // %bb.0: -; CHECK-NEXT: add z2.d, z2.d, z2.d ; CHECK-NEXT: add z0.d, z0.d, z0.d ; CHECK-NEXT: add z1.d, z1.d, z1.d +; CHECK-NEXT: add z2.d, z2.d, z2.d ; CHECK-NEXT: add z3.d, z3.d, z3.d ; CHECK-NEXT: ret %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %in1, %in2, %in3, %in4) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -452,8 +452,8 @@ define @dupq_lane_i8( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: add z1.d, z1.d, z2.d @@ -467,8 +467,8 @@ define @dupq_lane_i16( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: add z1.d, z1.d, z2.d @@ -482,8 +482,8 @@ define @dupq_lane_i32( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: add z1.d, z1.d, z2.d @@ -497,8 +497,8 @@ define @dupq_lane_i64( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: add z1.d, z1.d, z2.d @@ -512,8 +512,8 @@ define @dupq_lane_f16( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: add z1.d, z1.d, z2.d @@ -527,8 +527,8 @@ define @dupq_lane_bf16( %a, i64 %idx) #0 { ; CHECK-LABEL: dupq_lane_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: add z1.d, z1.d, z2.d @@ -542,8 +542,8 @@ define @dupq_lane_f32( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: add z1.d, z1.d, z2.d @@ -557,8 +557,8 @@ define @dupq_lane_f64( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: add z1.d, z1.d, z2.d Index: llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll +++ llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll @@ -39,18 +39,18 @@ ; CHECK-LABEL: test_post_ld1_int_fixed: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w11, #2 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x2] ; CHECK-NEXT: ldr x10, [x0, x1, lsl #3] -; CHECK-NEXT: ldr x11, [x0] -; CHECK-NEXT: index z3.d, #0, #1 -; CHECK-NEXT: mov z2.d, x9 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: mov z1.d, x11 +; CHECK-NEXT: index z2.d, #0, #1 ; CHECK-NEXT: ptrue p1.d, vl1 -; CHECK-NEXT: cmpeq p2.d, p0/z, z3.d, z2.d -; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: cmpeq p2.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: mov z0.d, p2/m, x10 -; CHECK-NEXT: mov z1.d, p1/m, x11 -; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: mov z3.d, p1/m, x9 +; CHECK-NEXT: add z0.d, z3.d, z0.d ; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: ret %A = load <4 x i64>, <4 x i64>* %addr @@ -71,8 +71,8 @@ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x2] ; CHECK-NEXT: ldr d1, [x0, x1, lsl #3] ; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: index z4.d, #0, #1 ; CHECK-NEXT: mov z3.d, x9 +; CHECK-NEXT: index z4.d, #0, #1 ; CHECK-NEXT: ptrue p1.d, vl1 ; CHECK-NEXT: cmpeq p2.d, p0/z, z4.d, z3.d ; CHECK-NEXT: sel z2.d, p1, z2.d, z0.d Index: llvm/test/CodeGen/AArch64/sve-ld1r.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ld1r.ll +++ llvm/test/CodeGen/AArch64/sve-ld1r.ll @@ -65,8 +65,8 @@ define @ld1rb_gep_out_of_range_up(i8* %valp) { ; CHECK-LABEL: ld1rb_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #64 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: add x8, x0, #64 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i8, i8* %valp, i32 64 @@ -79,8 +79,8 @@ define @ld1rb_gep_out_of_range_down(i8* %valp) { ; CHECK-LABEL: ld1rb_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #1 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sub x8, x0, #1 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i8, i8* %valp, i32 -1 @@ -196,8 +196,8 @@ define @ld1rh_gep_out_of_range_up(i16* %valp) { ; CHECK-LABEL: ld1rh_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i16, i16* %valp, i32 64 @@ -210,8 +210,8 @@ define @ld1rh_gep_out_of_range_down(i16* %valp) { ; CHECK-LABEL: ld1rh_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i16, i16* %valp, i32 -1 @@ -301,8 +301,8 @@ define @ld1rw_gep_out_of_range_up(i32* %valp) { ; CHECK-LABEL: ld1rw_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i32, i32* %valp, i32 64 @@ -315,8 +315,8 @@ define @ld1rw_gep_out_of_range_down(i32* %valp) { ; CHECK-LABEL: ld1rw_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i32, i32* %valp, i32 -1 @@ -380,8 +380,8 @@ define @ld1rd_gep_out_of_range_up(i64* %valp) { ; CHECK-LABEL: ld1rd_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #512 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, x0, #512 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i64, i64* %valp, i32 64 @@ -394,8 +394,8 @@ define @ld1rd_gep_out_of_range_down(i64* %valp) { ; CHECK-LABEL: ld1rd_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #8 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub x8, x0, #8 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i64, i64* %valp, i32 -1 @@ -433,8 +433,8 @@ define @ld1rh_half_gep_out_of_range_up(half* %valp) { ; CHECK-LABEL: ld1rh_half_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, half* %valp, i32 64 @@ -447,8 +447,8 @@ define @ld1rh_half_gep_out_of_range_down(half* %valp) { ; CHECK-LABEL: ld1rh_half_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, half* %valp, i32 -1 @@ -486,8 +486,8 @@ define @ld1rh_half_unpacked4_gep_out_of_range_up(half* %valp) { ; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, half* %valp, i32 64 @@ -500,8 +500,8 @@ define @ld1rh_half_unpacked4_gep_out_of_range_down(half* %valp) { ; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, half* %valp, i32 -1 @@ -539,8 +539,8 @@ define @ld1rh_half_unpacked2_gep_out_of_range_up(half* %valp) { ; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, half* %valp, i32 64 @@ -553,8 +553,8 @@ define @ld1rh_half_unpacked2_gep_out_of_range_down(half* %valp) { ; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, half* %valp, i32 -1 @@ -592,8 +592,8 @@ define @ld1rw_float_gep_out_of_range_up(float* %valp) { ; CHECK-LABEL: ld1rw_float_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr float, float* %valp, i32 64 @@ -606,8 +606,8 @@ define @ld1rw_float_gep_out_of_range_down(float* %valp) { ; CHECK-LABEL: ld1rw_float_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr float, float* %valp, i32 -1 @@ -645,8 +645,8 @@ define @ld1rw_float_unpacked2_gep_out_of_range_up(float* %valp) { ; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr float, float* %valp, i32 64 @@ -659,8 +659,8 @@ define @ld1rw_float_unpacked2_gep_out_of_range_down(float* %valp) { ; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr float, float* %valp, i32 -1 @@ -698,8 +698,8 @@ define @ld1rd_double_gep_out_of_range_up(double* %valp) { ; CHECK-LABEL: ld1rd_double_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #512 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, x0, #512 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr double, double* %valp, i32 64 @@ -712,8 +712,8 @@ define @ld1rd_double_gep_out_of_range_down(double* %valp) { ; CHECK-LABEL: ld1rd_double_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #8 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub x8, x0, #8 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr double, double* %valp, i32 -1 Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll +++ llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll @@ -100,11 +100,11 @@ ; CHECK-LABEL: masked_sload_4i8_4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0] -; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sunpkhi z1.d, z0.s ; CHECK-NEXT: sunpklo z0.d, z0.s -; CHECK-NEXT: scvtf z0.d, p1/m, z0.d -; CHECK-NEXT: scvtf z1.d, p1/m, z1.d +; CHECK-NEXT: scvtf z0.d, p0/m, z0.d +; CHECK-NEXT: scvtf z1.d, p0/m, z1.d ; CHECK-NEXT: ret %wide.load = call @llvm.masked.load.nxv4i8(* %in, i32 2, %mask, undef) %sext = sext %wide.load to Index: llvm/test/CodeGen/AArch64/sve-select.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-select.ll +++ llvm/test/CodeGen/AArch64/sve-select.ll @@ -602,10 +602,10 @@ ; CHECK-LABEL: select_f32_no_invert_2_op: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fmul z2.s, z2.s, z3.s ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fmul z0.s, z0.s, z1.s -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: fmul z1.s, z2.s, z3.s +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: ret %p = fcmp oeq %a, zeroinitializer %fmul1 = fmul %a, %b Index: llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -24,17 +24,17 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: addvl x9, x9, #2 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: addvl x8, x8, #2 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: csel x8, x9, x8, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldrb w0, [x9, x8] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: ldrb w0, [x10, x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -50,17 +50,17 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: addvl x9, x9, #1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: csel x8, x9, x8, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: ldrh w0, [x10, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -76,17 +76,17 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: cnth x8 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 -; CHECK-NEXT: sub x8, x8, #1 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: cnth x9 +; CHECK-NEXT: sub x9, x9, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: csel x8, x9, x8, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ldr w0, [x9, x8, lsl #2] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: ldr w0, [x10, x8, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -102,14 +102,14 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG -; CHECK-NEXT: cnth x8 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 -; CHECK-NEXT: sub x8, x8, #1 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: cnth x9 +; CHECK-NEXT: sub x9, x9, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cmp x8, x9 ; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] -; CHECK-NEXT: csel x8, x9, x8, lo +; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] @@ -151,15 +151,15 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w9, #128 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: mov w9, #128 ; CHECK-NEXT: cmp x8, #128 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1] +; CHECK-NEXT: ldrh w0, [x10, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -178,15 +178,15 @@ ; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov w9, #34464 ; CHECK-NEXT: movk w9, #1, lsl #16 +; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: cmp x8, x9 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] -; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: cmp x8, x9 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ldr w0, [x9, x8, lsl #2] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -204,15 +204,15 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov w9, #10 -; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub x8, x8, #1 +; CHECK-NEXT: mov w9, #10 ; CHECK-NEXT: cmp x8, #10 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ldr x0, [x9, x8, lsl #3] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: ldr x0, [x10, x8, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sve-split-fcvt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -38,14 +38,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: uunpklo z2.d, z1.s +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s +; CHECK-NEXT: fcvt z0.d, p0/m, z0.h ; CHECK-NEXT: fcvt z1.d, p0/m, z1.h -; CHECK-NEXT: uunpkhi z4.d, z0.s -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: fcvt z0.d, p0/m, z2.h +; CHECK-NEXT: uunpklo z3.d, z2.s +; CHECK-NEXT: uunpkhi z4.d, z2.s ; CHECK-NEXT: movprfx z2, z3 ; CHECK-NEXT: fcvt z2.d, p0/m, z3.h ; CHECK-NEXT: movprfx z3, z4 @@ -74,18 +73,18 @@ ; CHECK-LABEL: fcvtd_nxv8f32: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z2.d, z0.s -; CHECK-NEXT: uunpkhi z3.d, z0.s ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z4.d, z1.s -; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: uunpkhi z3.d, z0.s ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.s -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvt z1.d, p0/m, z3.s -; CHECK-NEXT: movprfx z2, z4 -; CHECK-NEXT: fcvt z2.d, p0/m, z4.s -; CHECK-NEXT: movprfx z3, z5 -; CHECK-NEXT: fcvt z3.d, p0/m, z5.s +; CHECK-NEXT: uunpklo z2.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: movprfx z4, z3 +; CHECK-NEXT: fcvt z4.d, p0/m, z3.s +; CHECK-NEXT: fcvt z2.d, p0/m, z2.s +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvt z3.d, p0/m, z1.s +; CHECK-NEXT: mov z1.d, z4.d ; CHECK-NEXT: ret %res = fpext %a to ret %res @@ -149,9 +148,9 @@ ; CHECK-LABEL: fcvts_nxv8f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcvt z3.s, p0/m, z3.d ; CHECK-NEXT: fcvt z1.s, p0/m, z1.d ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: fcvt z3.s, p0/m, z3.d ; CHECK-NEXT: fcvt z2.s, p0/m, z2.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s @@ -211,18 +210,18 @@ ; CHECK-LABEL: fcvtzs_s_nxv16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z2.s, z0.h -; CHECK-NEXT: uunpkhi z3.s, z0.h ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpklo z4.s, z1.h -; CHECK-NEXT: uunpkhi z5.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z0.h ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h -; CHECK-NEXT: movprfx z2, z4 -; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h -; CHECK-NEXT: movprfx z3, z5 -; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h +; CHECK-NEXT: uunpklo z2.s, z1.h +; CHECK-NEXT: uunpkhi z1.s, z1.h +; CHECK-NEXT: movprfx z4, z3 +; CHECK-NEXT: fcvtzs z4.s, p0/m, z3.h +; CHECK-NEXT: fcvtzs z2.s, p0/m, z2.h +; CHECK-NEXT: movprfx z3, z1 +; CHECK-NEXT: fcvtzs z3.s, p0/m, z1.h +; CHECK-NEXT: mov z1.d, z4.d ; CHECK-NEXT: ret %res = fptosi %a to ret %res @@ -296,14 +295,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sunpklo z1.h, z0.b ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sunpkhi z0.h, z0.b -; CHECK-NEXT: sunpklo z2.s, z1.h +; CHECK-NEXT: sunpkhi z2.h, z0.b +; CHECK-NEXT: sunpklo z0.s, z1.h ; CHECK-NEXT: sunpkhi z1.s, z1.h -; CHECK-NEXT: sunpklo z3.s, z0.h +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: scvtf z1.s, p0/m, z1.s -; CHECK-NEXT: sunpkhi z4.s, z0.h -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: scvtf z0.s, p0/m, z2.s +; CHECK-NEXT: sunpklo z3.s, z2.h +; CHECK-NEXT: sunpkhi z4.s, z2.h ; CHECK-NEXT: movprfx z2, z3 ; CHECK-NEXT: scvtf z2.s, p0/m, z3.s ; CHECK-NEXT: movprfx z3, z4 Index: llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll +++ llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll @@ -23,8 +23,8 @@ define float @faddv_nxv8f32(float %init, %a) { ; CHECK-LABEL: faddv_nxv8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fadd z1.s, z1.s, z2.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll +++ llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll @@ -6,10 +6,10 @@ define @promote_insert_8i8( %a, i8 %elt, i64 %idx) { ; CHECK-LABEL: promote_insert_8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: index z1.h, #0, #1 -; CHECK-NEXT: mov z2.h, w1 +; CHECK-NEXT: mov z1.h, w1 +; CHECK-NEXT: index z2.h, #0, #1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z1.h ; CHECK-NEXT: mov z0.h, p0/m, w0 ; CHECK-NEXT: ret %ins = insertelement %a, i8 %elt, i64 %idx @@ -26,12 +26,12 @@ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: addvl x8, x8, #2 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmp x1, x8 +; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] ; CHECK-NEXT: csel x8, x1, x8, lo +; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: strb w0, [x9, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [sp, #1, mul vl] @@ -105,8 +105,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #5 ; CHECK-NEXT: index z2.s, #0, #1 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z1.s ; CHECK-NEXT: mov z0.s, p0/m, w0 ; CHECK-NEXT: ret @@ -122,8 +122,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #3 ; CHECK-NEXT: index z3.b, #0, #1 -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z2.b, w8 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpeq p0.b, p0/z, z3.b, z2.b ; CHECK-NEXT: mov z0.b, p0/m, w0 ; CHECK-NEXT: ret @@ -141,15 +141,15 @@ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov w9, #128 +; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: st1h { z3.h }, p0, [sp, #3, mul vl] +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1h { z2.h }, p0, [sp, #2, mul vl] -; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: strh w0, [x9, x8, lsl #1] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl] @@ -175,12 +175,12 @@ ; CHECK-NEXT: movk w9, #15, lsl #16 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: str w0, [x9, x8, lsl #2] +; CHECK-NEXT: str w0, [x10, x8, lsl #2] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #2 Index: llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll +++ llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll @@ -6,10 +6,10 @@ define i1 @andv_nxv32i1( %a) { ; CHECK-LABEL: andv_nxv32i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b -; CHECK-NEXT: not p0.b, p2/z, p0.b -; CHECK-NEXT: ptest p2, p0.b +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.and.nxv32i1( %a) @@ -19,22 +19,13 @@ define i1 @andv_nxv64i1( %a) { ; CHECK-LABEL: andv_nxv64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: and p1.b, p1/z, p1.b, p3.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: ptrue p4.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b -; CHECK-NEXT: not p0.b, p4/z, p0.b -; CHECK-NEXT: ptest p4, p0.b -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.and.nxv64i1( %a) ret i1 %res @@ -72,10 +63,10 @@ define i1 @smaxv_nxv32i1( %a) { ; CHECK-LABEL: smaxv_nxv32i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b -; CHECK-NEXT: not p0.b, p2/z, p0.b -; CHECK-NEXT: ptest p2, p0.b +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.smax.nxv32i1( %a) @@ -113,10 +104,10 @@ define i1 @uminv_nxv32i1( %a) { ; CHECK-LABEL: uminv_nxv32i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b -; CHECK-NEXT: not p0.b, p2/z, p0.b -; CHECK-NEXT: ptest p2, p0.b +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.umin.nxv32i1( %a) Index: llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll +++ llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll @@ -17,8 +17,8 @@ define i32 @andv_nxv8i32( %a) { ; CHECK-LABEL: andv_nxv8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -45,8 +45,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: orv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -71,8 +71,8 @@ define i32 @xorv_nxv8i32( %a) { ; CHECK-LABEL: xorv_nxv8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -97,8 +97,8 @@ define i16 @uaddv_nxv16i16( %a) { ; CHECK-LABEL: uaddv_nxv16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: uaddv d0, p0, z0.h ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -112,8 +112,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: add z1.s, z1.s, z3.s ; CHECK-NEXT: add z0.s, z0.s, z2.s -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uaddv d0, p0, z0.s ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 Index: llvm/test/CodeGen/AArch64/sve-split-load.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-load.ll +++ llvm/test/CodeGen/AArch64/sve-split-load.ll @@ -92,11 +92,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: punpklo p2.h, p0.b ; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: punpklo p3.h, p1.b -; CHECK-NEXT: punpkhi p1.h, p1.b ; CHECK-NEXT: ld1h { z0.h }, p2/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ld1h { z2.h }, p3/z, [x0, #2, mul vl] +; CHECK-NEXT: punpklo p0.h, p1.b +; CHECK-NEXT: punpkhi p1.h, p1.b +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, #2, mul vl] ; CHECK-NEXT: ld1h { z3.h }, p1/z, [x0, #3, mul vl] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv32i16( *%a, i32 1, %pg, undef) @@ -123,10 +123,10 @@ ; CHECK-NEXT: punpklo p2.h, p1.b ; CHECK-NEXT: punpkhi p1.h, p1.b ; CHECK-NEXT: ld1d { z0.d }, p2/z, [x0] -; CHECK-NEXT: punpklo p2.h, p0.b -; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x0, #1, mul vl] -; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #2, mul vl] +; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ld1d { z2.d }, p1/z, [x0, #2, mul vl] ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv8i64( *%a, i32 1, %pg, undef) Index: llvm/test/CodeGen/AArch64/sve-srem-combine-loop.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-srem-combine-loop.ll +++ llvm/test/CodeGen/AArch64/sve-srem-combine-loop.ll @@ -6,8 +6,8 @@ define @srem_combine_loop( %a) #0 { ; CHECK-LABEL: srem_combine_loop: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: asrd z1.s, p0/m, z1.s, #1 ; CHECK-NEXT: mov z2.s, #2 // =0x2 ; CHECK-NEXT: mls z0.s, p0/m, z1.s, z2.s Index: llvm/test/CodeGen/AArch64/sve-stepvector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-stepvector.ll +++ llvm/test/CodeGen/AArch64/sve-stepvector.ll @@ -424,13 +424,13 @@ define @split_sub_stepvector_nxv16i32() { ; CHECK-LABEL: split_sub_stepvector_nxv16i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntw x8 -; CHECK-NEXT: cnth x9 -; CHECK-NEXT: neg x8, x8 +; CHECK-NEXT: cntw x9 +; CHECK-NEXT: cnth x8 ; CHECK-NEXT: neg x9, x9 ; CHECK-NEXT: index z0.s, #0, #-1 -; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: neg x8, x8 +; CHECK-NEXT: mov z1.s, w9 +; CHECK-NEXT: mov z3.s, w8 ; CHECK-NEXT: add z1.s, z0.s, z1.s ; CHECK-NEXT: add z2.s, z0.s, z3.s ; CHECK-NEXT: add z3.s, z1.s, z3.s Index: llvm/test/CodeGen/AArch64/sve-trunc.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-trunc.ll +++ llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -73,8 +73,8 @@ define @trunc_i64toi1_split( %in) { ; CHECK-LABEL: trunc_i64toi1_split: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z0.d, z0.d, #0x1 ; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -88,8 +88,8 @@ define @trunc_i64toi1_split2( %in) { ; CHECK-LABEL: trunc_i64toi1_split2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z3.d, z3.d, #0x1 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: and z0.d, z0.d, #0x1 Index: llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll +++ llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll @@ -6,9 +6,9 @@ define @umulo_nxv2i8( %x, %y) { ; CHECK-LABEL: umulo_nxv2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xff ; CHECK-NEXT: and z0.d, z0.d, #0xff +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d @@ -31,9 +31,9 @@ define @umulo_nxv4i8( %x, %y) { ; CHECK-LABEL: umulo_nxv4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z1.s, z1.s, #0xff ; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s @@ -56,9 +56,9 @@ define @umulo_nxv8i8( %x, %y) { ; CHECK-LABEL: umulo_nxv8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h ; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h @@ -104,11 +104,11 @@ ; CHECK-NEXT: movprfx z4, z1 ; CHECK-NEXT: mul z4.b, p0/m, z4.b, z3.b ; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: umulh z3.b, p0/m, z3.b, z2.b ; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, #0 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z2.b ; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: cmpne p0.b, p0/z, z3.b, #0 +; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; CHECK-NEXT: mov z4.b, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, z4.d @@ -161,9 +161,9 @@ define @umulo_nxv2i16( %x, %y) { ; CHECK-LABEL: umulo_nxv2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xffff ; CHECK-NEXT: and z0.d, z0.d, #0xffff +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d @@ -186,9 +186,9 @@ define @umulo_nxv4i16( %x, %y) { ; CHECK-LABEL: umulo_nxv4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: and z0.s, z0.s, #0xffff +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: mul z2.s, p0/m, z2.s, z1.s ; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s @@ -234,11 +234,11 @@ ; CHECK-NEXT: movprfx z4, z1 ; CHECK-NEXT: mul z4.h, p0/m, z4.h, z3.h ; CHECK-NEXT: umulh z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: umulh z3.h, p0/m, z3.h, z2.h ; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: umulh z1.h, p0/m, z1.h, z2.h ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: cmpne p0.h, p0/z, z3.h, #0 +; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 ; CHECK-NEXT: mov z4.h, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, z4.d @@ -291,9 +291,9 @@ define @umulo_nxv2i32( %x, %y) { ; CHECK-LABEL: umulo_nxv2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff ; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: mul z2.d, p0/m, z2.d, z1.d ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d @@ -339,11 +339,11 @@ ; CHECK-NEXT: movprfx z4, z1 ; CHECK-NEXT: mul z4.s, p0/m, z4.s, z3.s ; CHECK-NEXT: umulh z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: umulh z3.s, p0/m, z3.s, z2.s ; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: umulh z1.s, p0/m, z1.s, z2.s ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: cmpne p0.s, p0/z, z3.s, #0 +; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 ; CHECK-NEXT: mov z4.s, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, z4.d @@ -419,11 +419,11 @@ ; CHECK-NEXT: movprfx z4, z1 ; CHECK-NEXT: mul z4.d, p0/m, z4.d, z3.d ; CHECK-NEXT: umulh z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: umulh z3.d, p0/m, z3.d, z2.d ; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: umulh z1.d, p0/m, z1.d, z2.d ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: cmpne p0.d, p0/z, z3.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; CHECK-NEXT: mov z4.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, z4.d Index: llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll +++ llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll @@ -78,14 +78,14 @@ define i1 @reduce_and_insert_subvec_into_var( %in, %vec) { ; CHECK-LABEL: reduce_and_insert_subvec_into_var: ; CHECK: // %bb.0: -; CHECK-NEXT: punpklo p3.h, p1.b +; CHECK-NEXT: punpklo p2.h, p1.b ; CHECK-NEXT: punpkhi p1.h, p1.b -; CHECK-NEXT: punpkhi p3.h, p3.b -; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h +; CHECK-NEXT: punpkhi p2.h, p2.b +; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b -; CHECK-NEXT: not p0.b, p2/z, p0.b -; CHECK-NEXT: ptest p2, p0.b +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: not p0.b, p1/z, p0.b +; CHECK-NEXT: ptest p1, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t = call @llvm.vector.insert.nxv16i1.nxv4i1( %vec, %in, i64 0) Index: llvm/test/CodeGen/AArch64/sve-vector-splat.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -577,9 +577,9 @@ define @splat_nxv2f64_imm_out_of_range() { ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: adrp x8, .LCPI55_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI55_0 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %1 = insertelement undef, double 3.33, i32 0 Index: llvm/test/CodeGen/AArch64/sve2-fcopysign.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve2-fcopysign.ll +++ llvm/test/CodeGen/AArch64/sve2-fcopysign.ll @@ -19,8 +19,8 @@ define @test_copysign_v2f32_v2f64( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v2f32_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2147483647 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #2147483647 ; CHECK-NEXT: fcvt z1.s, p0/m, z1.d ; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d @@ -49,8 +49,8 @@ define @test_copysign_v4f32_v4f64( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v4f32_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2147483647 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #2147483647 ; CHECK-NEXT: fcvt z2.s, p0/m, z2.d ; CHECK-NEXT: fcvt z1.s, p0/m, z1.d ; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s @@ -97,8 +97,8 @@ define @test_copysign_v4f64_v4f32( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v4f64_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z3.d, z2.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z2.d, z2.s ; CHECK-NEXT: fcvt z3.d, p0/m, z3.s ; CHECK-NEXT: fcvt z2.d, p0/m, z2.s @@ -141,8 +141,8 @@ define @test_copysign_v4f16_v4f32( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v4f16_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: fcvt z1.h, p0/m, z1.s ; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d @@ -155,8 +155,8 @@ define @test_copysign_v4f16_v4f64( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v4f16_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: fcvt z2.h, p0/m, z2.d ; CHECK-NEXT: fcvt z1.h, p0/m, z1.d ; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s @@ -186,8 +186,8 @@ define @test_copysign_v8f16_v8f32( %a, %b) #0 { ; CHECK-LABEL: test_copysign_v8f16_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: fcvt z2.h, p0/m, z2.s ; CHECK-NEXT: fcvt z1.h, p0/m, z1.s ; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h Index: llvm/test/CodeGen/AArch64/swifterror.ll =================================================================== --- llvm/test/CodeGen/AArch64/swifterror.ll +++ llvm/test/CodeGen/AArch64/swifterror.ll @@ -21,8 +21,8 @@ ; CHECK-APPLE-NEXT: .cfi_offset w29, -16 ; CHECK-APPLE-NEXT: mov w0, #16 ; CHECK-APPLE-NEXT: bl _malloc -; CHECK-APPLE-NEXT: mov w8, #1 ; CHECK-APPLE-NEXT: fmov s0, #1.00000000 +; CHECK-APPLE-NEXT: mov w8, #1 ; CHECK-APPLE-NEXT: mov x21, x0 ; CHECK-APPLE-NEXT: strb w8, [x0, #8] ; CHECK-APPLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload @@ -239,8 +239,8 @@ ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset w22, -48 ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset b8, -56 ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset b9, -64 -; CHECK-APPLE-AARCH64-NEXT: mov x19, x0 ; CHECK-APPLE-AARCH64-NEXT: fmov s8, #1.00000000 +; CHECK-APPLE-AARCH64-NEXT: mov x19, x0 ; CHECK-APPLE-AARCH64-NEXT: LBB2_1: ; %bb_loop ; CHECK-APPLE-AARCH64-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-APPLE-AARCH64-NEXT: mov x21, xzr @@ -323,8 +323,8 @@ ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset w22, -48 ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset b8, -56 ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset b9, -64 -; CHECK-APPLE-ARM64_32-NEXT: mov x19, x0 ; CHECK-APPLE-ARM64_32-NEXT: fmov s8, #1.00000000 +; CHECK-APPLE-ARM64_32-NEXT: mov x19, x0 ; CHECK-APPLE-ARM64_32-NEXT: LBB2_1: ; %bb_loop ; CHECK-APPLE-ARM64_32-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-APPLE-ARM64_32-NEXT: mov x21, xzr @@ -429,8 +429,8 @@ ; CHECK-APPLE-NEXT: mov w0, #16 ; CHECK-APPLE-NEXT: bl _malloc ; CHECK-APPLE-NEXT: mov x21, x0 -; CHECK-APPLE-NEXT: mov w8, #1 ; CHECK-APPLE-NEXT: fmov s0, #1.00000000 +; CHECK-APPLE-NEXT: mov w8, #1 ; CHECK-APPLE-NEXT: strb w8, [x0, #8] ; CHECK-APPLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ret @@ -532,8 +532,8 @@ ; CHECK-APPLE-NEXT: fmov s8, s0 ; CHECK-APPLE-NEXT: mov w19, w0 ; CHECK-APPLE-NEXT: mov x0, x21 -; CHECK-APPLE-NEXT: mov w20, #1 ; CHECK-APPLE-NEXT: fmov s9, #1.00000000 +; CHECK-APPLE-NEXT: mov w20, #1 ; CHECK-APPLE-NEXT: b LBB4_2 ; CHECK-APPLE-NEXT: LBB4_1: ; %bb_cont ; CHECK-APPLE-NEXT: ; in Loop: Header=BB4_2 Depth=1 @@ -692,8 +692,8 @@ ; CHECK-APPLE-NEXT: mov w0, #16 ; CHECK-APPLE-NEXT: bl _malloc ; CHECK-APPLE-NEXT: mov w8, #1 -; CHECK-APPLE-NEXT: mov x21, x0 ; CHECK-APPLE-NEXT: strb w8, [x0, #8] +; CHECK-APPLE-NEXT: mov x21, x0 ; CHECK-APPLE-NEXT: str w19, [x20, #4] ; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload @@ -935,18 +935,18 @@ ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset w29, -16 ; CHECK-APPLE-AARCH64-NEXT: mov w0, #16 ; CHECK-APPLE-AARCH64-NEXT: bl _malloc -; CHECK-APPLE-AARCH64-NEXT: mov w8, #1 -; CHECK-APPLE-AARCH64-NEXT: add x9, x29, #16 -; CHECK-APPLE-AARCH64-NEXT: ldr w10, [x29, #16] -; CHECK-APPLE-AARCH64-NEXT: orr x9, x9, #0x8 -; CHECK-APPLE-AARCH64-NEXT: strb w8, [x0, #8] -; CHECK-APPLE-AARCH64-NEXT: stur w10, [x29, #-12] +; CHECK-APPLE-AARCH64-NEXT: ldr w8, [x29, #16] +; CHECK-APPLE-AARCH64-NEXT: mov w9, #1 +; CHECK-APPLE-AARCH64-NEXT: add x10, x29, #16 +; CHECK-APPLE-AARCH64-NEXT: strb w9, [x0, #8] +; CHECK-APPLE-AARCH64-NEXT: orr x9, x10, #0x8 +; CHECK-APPLE-AARCH64-NEXT: stur w8, [x29, #-12] ; CHECK-APPLE-AARCH64-NEXT: ldr w8, [x9], #8 ; CHECK-APPLE-AARCH64-NEXT: str w8, [sp, #16] ; CHECK-APPLE-AARCH64-NEXT: ldr w8, [x9], #8 ; CHECK-APPLE-AARCH64-NEXT: fmov s0, #1.00000000 -; CHECK-APPLE-AARCH64-NEXT: mov x21, x0 ; CHECK-APPLE-AARCH64-NEXT: stur x9, [x29, #-8] +; CHECK-APPLE-AARCH64-NEXT: mov x21, x0 ; CHECK-APPLE-AARCH64-NEXT: str w8, [sp, #12] ; CHECK-APPLE-AARCH64-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-APPLE-AARCH64-NEXT: add sp, sp, #48 @@ -1002,18 +1002,18 @@ ; CHECK-APPLE-ARM64_32-NEXT: bl _malloc ; CHECK-APPLE-ARM64_32-NEXT: mov w8, #1 ; CHECK-APPLE-ARM64_32-NEXT: add x9, x29, #16 -; CHECK-APPLE-ARM64_32-NEXT: orr w10, w9, #0x4 -; CHECK-APPLE-ARM64_32-NEXT: and x11, x9, #0xfffffff0 ; CHECK-APPLE-ARM64_32-NEXT: strb w8, [x0, #8] -; CHECK-APPLE-ARM64_32-NEXT: stur w10, [x29, #-8] -; CHECK-APPLE-ARM64_32-NEXT: ldr w8, [x11] -; CHECK-APPLE-ARM64_32-NEXT: orr w11, w9, #0x8 -; CHECK-APPLE-ARM64_32-NEXT: stp w8, w11, [x29, #-12] -; CHECK-APPLE-ARM64_32-NEXT: orr w8, w9, #0xc -; CHECK-APPLE-ARM64_32-NEXT: ldr w9, [x10] +; CHECK-APPLE-ARM64_32-NEXT: orr w8, w9, #0x4 ; CHECK-APPLE-ARM64_32-NEXT: stur w8, [x29, #-8] -; CHECK-APPLE-ARM64_32-NEXT: str w9, [sp, #16] -; CHECK-APPLE-ARM64_32-NEXT: ldr w8, [x11] +; CHECK-APPLE-ARM64_32-NEXT: and x10, x9, #0xfffffff0 +; CHECK-APPLE-ARM64_32-NEXT: ldr w11, [x10] +; CHECK-APPLE-ARM64_32-NEXT: orr w10, w9, #0x8 +; CHECK-APPLE-ARM64_32-NEXT: stp w11, w10, [x29, #-12] +; CHECK-APPLE-ARM64_32-NEXT: ldr w8, [x8] +; CHECK-APPLE-ARM64_32-NEXT: orr w9, w9, #0xc +; CHECK-APPLE-ARM64_32-NEXT: str w8, [sp, #16] +; CHECK-APPLE-ARM64_32-NEXT: stur w9, [x29, #-8] +; CHECK-APPLE-ARM64_32-NEXT: ldr w8, [x10] ; CHECK-APPLE-ARM64_32-NEXT: fmov s0, #1.00000000 ; CHECK-APPLE-ARM64_32-NEXT: mov x21, x0 ; CHECK-APPLE-ARM64_32-NEXT: str w8, [sp, #12] @@ -1111,8 +1111,8 @@ ; CHECK-APPLE-AARCH64-NEXT: mov x19, x0 ; CHECK-APPLE-AARCH64-NEXT: mov w8, #10 ; CHECK-APPLE-AARCH64-NEXT: mov w9, #11 -; CHECK-APPLE-AARCH64-NEXT: mov w10, #12 ; CHECK-APPLE-AARCH64-NEXT: stp w9, w8, [sp, #32] +; CHECK-APPLE-AARCH64-NEXT: mov w10, #12 ; CHECK-APPLE-AARCH64-NEXT: str w10, [sp, #28] ; CHECK-APPLE-AARCH64-NEXT: mov x21, xzr ; CHECK-APPLE-AARCH64-NEXT: stp x9, x10, [sp, #8] @@ -1198,9 +1198,9 @@ ; CHECK-APPLE-ARM64_32-NEXT: mov x19, x0 ; CHECK-APPLE-ARM64_32-NEXT: mov w8, #10 ; CHECK-APPLE-ARM64_32-NEXT: mov w9, #11 -; CHECK-APPLE-ARM64_32-NEXT: mov w10, #12 ; CHECK-APPLE-ARM64_32-NEXT: stp w9, w8, [sp, #20] -; CHECK-APPLE-ARM64_32-NEXT: str w10, [sp, #16] +; CHECK-APPLE-ARM64_32-NEXT: mov w9, #12 +; CHECK-APPLE-ARM64_32-NEXT: str w9, [sp, #16] ; CHECK-APPLE-ARM64_32-NEXT: mov x21, xzr ; CHECK-APPLE-ARM64_32-NEXT: mov x9, #11 ; CHECK-APPLE-ARM64_32-NEXT: movk x9, #12, lsl #32 Index: llvm/test/CodeGen/AArch64/tbl-loops.ll =================================================================== --- llvm/test/CodeGen/AArch64/tbl-loops.ll +++ llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -16,14 +16,14 @@ ; CHECK-NEXT: b .LBB0_6 ; CHECK-NEXT: .LBB0_3: // %vector.ph ; CHECK-NEXT: add x11, x8, #1 -; CHECK-NEXT: mov w15, #1132396544 +; CHECK-NEXT: mov w13, #1132396544 ; CHECK-NEXT: and x10, x11, #0x1fffffff8 -; CHECK-NEXT: add x12, x0, #4 +; CHECK-NEXT: dup v0.4s, w13 +; CHECK-NEXT: add x8, x1, x10, lsl #2 ; CHECK-NEXT: add x9, x0, x10 +; CHECK-NEXT: add x12, x0, #4 ; CHECK-NEXT: add x13, x1, #16 -; CHECK-NEXT: add x8, x1, x10, lsl #2 ; CHECK-NEXT: mov x14, x10 -; CHECK-NEXT: dup v0.4s, w15 ; CHECK-NEXT: .LBB0_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp q1, q2, [x13, #-16] @@ -170,11 +170,11 @@ ; CHECK-NEXT: mov w11, #1132396544 ; CHECK-NEXT: .LBB1_6: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp s2, s3, [x8], #8 -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fmin s4, s2, s1 -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fmin s1, s3, s1 +; CHECK-NEXT: ldp s1, s3, [x8], #8 +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fmin s4, s1, s2 +; CHECK-NEXT: fcmp s1, #0.0 +; CHECK-NEXT: fmin s1, s3, s2 ; CHECK-NEXT: fcsel s2, s0, s4, mi ; CHECK-NEXT: fcmp s3, #0.0 ; CHECK-NEXT: fcsel s1, s0, s1, mi @@ -189,12 +189,12 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_8: // %vector.ph ; CHECK-NEXT: add x11, x8, #1 -; CHECK-NEXT: mov w13, #1132396544 +; CHECK-NEXT: mov w12, #1132396544 ; CHECK-NEXT: and x10, x11, #0x1fffffffc -; CHECK-NEXT: mov x12, x10 +; CHECK-NEXT: dup v0.4s, w12 ; CHECK-NEXT: add x8, x1, x10, lsl #3 ; CHECK-NEXT: add x9, x0, x10, lsl #1 -; CHECK-NEXT: dup v0.4s, w13 +; CHECK-NEXT: mov x12, x10 ; CHECK-NEXT: .LBB1_9: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v1.4s, v2.4s }, [x1], #32 @@ -345,20 +345,20 @@ ; CHECK-NEXT: mov w11, #1132396544 ; CHECK-NEXT: .LBB2_6: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp s2, s3, [x8] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fmin s4, s2, s1 -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: ldr s2, [x8, #8] -; CHECK-NEXT: fmin s5, s3, s1 +; CHECK-NEXT: ldp s1, s3, [x8] +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fmin s4, s1, s2 +; CHECK-NEXT: fcmp s1, #0.0 +; CHECK-NEXT: ldr s1, [x8, #8] +; CHECK-NEXT: fmin s5, s3, s2 ; CHECK-NEXT: add x8, x8, #12 ; CHECK-NEXT: fcsel s4, s0, s4, mi ; CHECK-NEXT: fcmp s3, #0.0 -; CHECK-NEXT: fmin s1, s2, s1 +; CHECK-NEXT: fmin s2, s1, s2 ; CHECK-NEXT: fcsel s3, s0, s5, mi -; CHECK-NEXT: fcmp s2, #0.0 +; CHECK-NEXT: fcmp s1, #0.0 ; CHECK-NEXT: fcvtzs w12, s4 -; CHECK-NEXT: fcsel s1, s0, s1, mi +; CHECK-NEXT: fcsel s1, s0, s2, mi ; CHECK-NEXT: subs w10, w10, #1 ; CHECK-NEXT: fcvtzs w13, s3 ; CHECK-NEXT: strb w12, [x9] @@ -371,15 +371,15 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_8: // %vector.ph ; CHECK-NEXT: add x11, x8, #1 -; CHECK-NEXT: adrp x12, .LCPI2_0 +; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: and x10, x11, #0x1fffffffc -; CHECK-NEXT: mov w13, #1132396544 -; CHECK-NEXT: add x8, x10, x10, lsl #1 -; CHECK-NEXT: ldr q0, [x12, :lo12:.LCPI2_0] -; CHECK-NEXT: add x9, x0, x8 +; CHECK-NEXT: mov w12, #1132396544 +; CHECK-NEXT: add x9, x10, x10, lsl #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: dup v1.4s, w12 ; CHECK-NEXT: mov x12, x10 -; CHECK-NEXT: add x8, x1, x8, lsl #2 -; CHECK-NEXT: dup v1.4s, w13 +; CHECK-NEXT: add x8, x1, x9, lsl #2 +; CHECK-NEXT: add x9, x0, x9 ; CHECK-NEXT: .LBB2_9: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld3 { v2.4s, v3.4s, v4.4s }, [x1], #48 @@ -401,8 +401,8 @@ ; CHECK-NEXT: xtn v5.4h, v3.4s ; CHECK-NEXT: xtn v6.4h, v2.4s ; CHECK-NEXT: tbl v2.16b, { v4.16b, v5.16b, v6.16b }, v0.16b -; CHECK-NEXT: str d2, [x0], #12 ; CHECK-NEXT: st1 { v2.s }[2], [x13] +; CHECK-NEXT: str d2, [x0], #12 ; CHECK-NEXT: b.ne .LBB2_9 ; CHECK-NEXT: // %bb.10: // %middle.block ; CHECK-NEXT: cmp x11, x10 @@ -554,29 +554,29 @@ ; CHECK-NEXT: mov w11, #1132396544 ; CHECK-NEXT: .LBB3_6: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp s2, s3, [x8] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fmin s4, s2, s1 -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fmin s2, s3, s1 +; CHECK-NEXT: ldp s1, s3, [x8] +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fmin s4, s1, s2 +; CHECK-NEXT: fcmp s1, #0.0 +; CHECK-NEXT: fmin s1, s3, s2 ; CHECK-NEXT: fcsel s4, s0, s4, mi ; CHECK-NEXT: fcmp s3, #0.0 ; CHECK-NEXT: ldp s5, s3, [x8, #8] ; CHECK-NEXT: add x8, x8, #16 -; CHECK-NEXT: fcsel s2, s0, s2, mi +; CHECK-NEXT: fcsel s1, s0, s1, mi ; CHECK-NEXT: fcvtzs w12, s4 -; CHECK-NEXT: fmin s6, s5, s1 +; CHECK-NEXT: fmin s6, s5, s2 ; CHECK-NEXT: fcmp s5, #0.0 -; CHECK-NEXT: fmin s1, s3, s1 -; CHECK-NEXT: fcvtzs w13, s2 +; CHECK-NEXT: fmin s2, s3, s2 +; CHECK-NEXT: fcvtzs w13, s1 ; CHECK-NEXT: strb w12, [x9] ; CHECK-NEXT: fcsel s5, s0, s6, mi ; CHECK-NEXT: fcmp s3, #0.0 ; CHECK-NEXT: strb w13, [x9, #1] -; CHECK-NEXT: fcsel s1, s0, s1, mi +; CHECK-NEXT: fcsel s2, s0, s2, mi ; CHECK-NEXT: subs w10, w10, #1 ; CHECK-NEXT: fcvtzs w14, s5 -; CHECK-NEXT: fcvtzs w12, s1 +; CHECK-NEXT: fcvtzs w12, s2 ; CHECK-NEXT: strb w14, [x9, #2] ; CHECK-NEXT: strb w12, [x9, #3] ; CHECK-NEXT: add x9, x9, #4 @@ -585,14 +585,14 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_8: // %vector.ph ; CHECK-NEXT: add x11, x8, #1 -; CHECK-NEXT: adrp x12, .LCPI3_0 +; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: mov w12, #1132396544 ; CHECK-NEXT: and x10, x11, #0x1fffffffc -; CHECK-NEXT: mov w13, #1132396544 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: add x8, x1, x10, lsl #4 ; CHECK-NEXT: add x9, x0, x10, lsl #2 -; CHECK-NEXT: ldr q0, [x12, :lo12:.LCPI3_0] +; CHECK-NEXT: dup v1.4s, w12 ; CHECK-NEXT: mov x12, x10 -; CHECK-NEXT: dup v1.4s, w13 ; CHECK-NEXT: .LBB3_9: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x1], #64 Index: llvm/test/CodeGen/AArch64/typepromotion-overflow.ll =================================================================== --- llvm/test/CodeGen/AArch64/typepromotion-overflow.ll +++ llvm/test/CodeGen/AArch64/typepromotion-overflow.ll @@ -40,12 +40,12 @@ define zeroext i16 @overflow_mul(i16 zeroext %a, i16 zeroext %b) { ; CHECK-LABEL: overflow_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w0 -; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: orr w9, w9, #0x1 -; CHECK-NEXT: and w9, w9, #0xffff -; CHECK-NEXT: cmp w9, #1024 +; CHECK-NEXT: mul w8, w1, w0 ; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: orr w8, w8, #0x1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #1024 +; CHECK-NEXT: mov w8, #5 ; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %add = mul i16 %b, %a @@ -58,12 +58,12 @@ define zeroext i16 @overflow_shl(i16 zeroext %a, i16 zeroext %b) { ; CHECK-LABEL: overflow_shl: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w9, w0, w1 -; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: orr w9, w9, #0x1 -; CHECK-NEXT: and w9, w9, #0xffff -; CHECK-NEXT: cmp w9, #1024 +; CHECK-NEXT: lsl w8, w0, w1 ; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: orr w8, w8, #0x1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: cmp w8, #1024 +; CHECK-NEXT: mov w8, #5 ; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %add = shl i16 %a, %b Index: llvm/test/CodeGen/AArch64/typepromotion-phisret.ll =================================================================== --- llvm/test/CodeGen/AArch64/typepromotion-phisret.ll +++ llvm/test/CodeGen/AArch64/typepromotion-phisret.ll @@ -10,10 +10,10 @@ ; CHECK-NEXT: csel w8, w8, w9, hi ; CHECK-NEXT: .LBB0_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub w9, w8, #2 -; CHECK-NEXT: lsl w10, w8, #1 +; CHECK-NEXT: lsl w9, w8, #1 +; CHECK-NEXT: sub w10, w8, #2 ; CHECK-NEXT: cmp w8, #254 -; CHECK-NEXT: csel w8, w9, w10, lo +; CHECK-NEXT: csel w8, w10, w9, lo ; CHECK-NEXT: cmp w8, #255 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit @@ -58,10 +58,10 @@ ; CHECK-NEXT: csel w8, w0, w1, hi ; CHECK-NEXT: .LBB1_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub w9, w8, #2 -; CHECK-NEXT: lsl w10, w8, #1 +; CHECK-NEXT: lsl w9, w8, #1 +; CHECK-NEXT: sub w10, w8, #2 ; CHECK-NEXT: cmp w8, #254 -; CHECK-NEXT: csel w8, w9, w10, lo +; CHECK-NEXT: csel w8, w10, w9, lo ; CHECK-NEXT: cmp w8, #255 ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: // %bb.2: // %exit Index: llvm/test/CodeGen/AArch64/typepromotion-signed.ll =================================================================== --- llvm/test/CodeGen/AArch64/typepromotion-signed.ll +++ llvm/test/CodeGen/AArch64/typepromotion-signed.ll @@ -56,12 +56,12 @@ define i32 @test_signext_b(i8* nocapture readonly %ptr, i8 signext %arg) { ; CHECK-LABEL: test_signext_b: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: mov w8, #20894 -; CHECK-NEXT: add w9, w9, w1 -; CHECK-NEXT: sxtb w9, w9 -; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: add w8, w8, w1 +; CHECK-NEXT: sxtb w8, w8 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: mov w8, #20894 ; CHECK-NEXT: csel w0, w9, w8, ge ; CHECK-NEXT: ret entry: @@ -96,12 +96,12 @@ define i32 @test_signext_h(i16* nocapture readonly %ptr, i16 signext %arg) { ; CHECK-LABEL: test_signext_h: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: mov w8, #20894 -; CHECK-NEXT: add w9, w9, w1 -; CHECK-NEXT: sxth w9, w9 -; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: add w8, w8, w1 +; CHECK-NEXT: sxth w8, w8 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: mov w8, #20894 ; CHECK-NEXT: csel w0, w9, w8, ge ; CHECK-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/uadd_sat_plus.ll =================================================================== --- llvm/test/CodeGen/AArch64/uadd_sat_plus.ll +++ llvm/test/CodeGen/AArch64/uadd_sat_plus.ll @@ -33,12 +33,12 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { ; CHECK-LABEL: func16: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: and w10, w0, #0xffff -; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: add w9, w10, w9, uxth -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w0, w9, w8, lo +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: add w8, w9, w8, uxth +; CHECK-NEXT: mov w9, #65535 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %a = mul i16 %y, %z %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %a) @@ -48,12 +48,12 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: and w10, w0, #0xff -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: add w9, w10, w9, uxtb -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w0, w9, w8, lo +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: add w8, w9, w8, uxtb +; CHECK-NEXT: mov w9, #255 +; CHECK-NEXT: cmp w8, #255 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %a = mul i8 %y, %z %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %a) @@ -63,13 +63,13 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; CHECK-LABEL: func4: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: and w10, w0, #0xf -; CHECK-NEXT: mov w8, #15 -; CHECK-NEXT: and w9, w9, #0xf -; CHECK-NEXT: add w9, w10, w9 -; CHECK-NEXT: cmp w9, #15 -; CHECK-NEXT: csel w0, w9, w8, lo +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: and w9, w0, #0xf +; CHECK-NEXT: and w8, w8, #0xf +; CHECK-NEXT: add w8, w9, w8 +; CHECK-NEXT: mov w9, #15 +; CHECK-NEXT: cmp w8, #15 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %a = mul i4 %y, %z %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %a) Index: llvm/test/CodeGen/AArch64/uadd_sat_vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -54,9 +54,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqadd v0.16b, v0.16b, v4.16b ; CHECK-NEXT: uqadd v1.16b, v1.16b, v5.16b +; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqadd v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -85,9 +85,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqadd v0.8h, v0.8h, v4.8h ; CHECK-NEXT: uqadd v1.8h, v1.8h, v5.8h +; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqadd v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -97,9 +97,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -112,13 +112,13 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0xff00ff00ff00ff -; CHECK-NEXT: ldr s2, [x1] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: movi d2, #0xff00ff00ff00ff +; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll v2.8h, v2.8b, #0 -; CHECK-NEXT: add v1.4h, v1.4h, v2.4h -; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h +; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret @@ -133,16 +133,16 @@ ; CHECK-LABEL: v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x1] -; CHECK-NEXT: movi d0, #0x0000ff000000ff +; CHECK-NEXT: movi d2, #0x0000ff000000ff ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: ldrb w10, [x1, #1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: add v1.2s, v1.2s, v2.2s -; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s +; CHECK-NEXT: ldrb w10, [x0, #1] +; CHECK-NEXT: ldrb w11, [x1, #1] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strb w9, [x2] @@ -158,9 +158,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -174,16 +174,16 @@ ; CHECK-LABEL: v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x1] -; CHECK-NEXT: movi d0, #0x00ffff0000ffff +; CHECK-NEXT: movi d2, #0x00ffff0000ffff ; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: ldrh w10, [x1, #2] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldrh w9, [x0, #2] -; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: add v1.2s, v1.2s, v2.2s -; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s +; CHECK-NEXT: ldrh w10, [x0, #2] +; CHECK-NEXT: ldrh w11, [x1, #2] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strh w9, [x2] @@ -225,9 +225,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x1] -; CHECK-NEXT: ldr b1, [x0] -; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: ldr b1, [x1] +; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -240,9 +240,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x1] -; CHECK-NEXT: ldr h1, [x0] -; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ldr h1, [x1] +; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -305,9 +305,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqadd v0.4s, v0.4s, v4.4s ; CHECK-NEXT: uqadd v1.4s, v1.4s, v5.4s +; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqadd v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -336,9 +336,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqadd v0.2d, v0.2d, v4.2d ; CHECK-NEXT: uqadd v1.2d, v1.2d, v5.2d +; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqadd v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) Index: llvm/test/CodeGen/AArch64/udivmodei5.ll =================================================================== --- llvm/test/CodeGen/AArch64/udivmodei5.ll +++ llvm/test/CodeGen/AArch64/udivmodei5.ll @@ -6,25 +6,25 @@ ; CHECK-LABEL: udiv129: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: ldp x10, x11, [x0] +; CHECK-NEXT: ldp x9, x10, [x0] ; CHECK-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x1 -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: ldrb w9, [x0, #16] +; CHECK-NEXT: ldrb w8, [x0, #16] ; CHECK-NEXT: add x0, sp, #64 ; CHECK-NEXT: add x1, sp, #32 ; CHECK-NEXT: mov x2, sp +; CHECK-NEXT: stp xzr, x9, [sp, #24] +; CHECK-NEXT: mov w9, #3 ; CHECK-NEXT: mov w3, #256 -; CHECK-NEXT: stp x9, xzr, [sp, #48] +; CHECK-NEXT: stp x8, xzr, [sp, #48] ; CHECK-NEXT: stp xzr, xzr, [sp, #8] -; CHECK-NEXT: stp xzr, x10, [sp, #24] -; CHECK-NEXT: str x11, [sp, #40] -; CHECK-NEXT: str x8, [sp] +; CHECK-NEXT: str x10, [sp, #40] +; CHECK-NEXT: str x9, [sp] ; CHECK-NEXT: bl __udivei4 -; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: ldp x9, x10, [sp, #64] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: stp x9, x10, [x19] +; CHECK-NEXT: ldp x9, x8, [sp, #64] +; CHECK-NEXT: ldr w10, [sp, #80] +; CHECK-NEXT: stp x9, x8, [x19] +; CHECK-NEXT: and w8, w10, #0x1 ; CHECK-NEXT: strb w8, [x19, #16] ; CHECK-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #112 @@ -33,31 +33,31 @@ ; CHECK-BE-LABEL: udiv129: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: sub sp, sp, #112 -; CHECK-BE-NEXT: ldp x11, x10, [x0] -; CHECK-BE-NEXT: mov w8, #3 +; CHECK-BE-NEXT: ldp x10, x9, [x0] +; CHECK-BE-NEXT: mov w11, #3 ; CHECK-BE-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill -; CHECK-BE-NEXT: ldrb w9, [x0, #16] +; CHECK-BE-NEXT: ldrb w8, [x0, #16] +; CHECK-BE-NEXT: stp x11, xzr, [sp, #24] ; CHECK-BE-NEXT: mov x19, x1 ; CHECK-BE-NEXT: add x0, sp, #64 ; CHECK-BE-NEXT: add x1, sp, #32 -; CHECK-BE-NEXT: stp x8, xzr, [sp, #24] ; CHECK-BE-NEXT: mov x2, sp -; CHECK-BE-NEXT: extr x8, x11, x10, #56 -; CHECK-BE-NEXT: lsr x11, x11, #56 -; CHECK-BE-NEXT: bfi x9, x10, #8, #56 +; CHECK-BE-NEXT: extr x11, x10, x9, #56 +; CHECK-BE-NEXT: lsr x10, x10, #56 +; CHECK-BE-NEXT: bfi x8, x9, #8, #56 ; CHECK-BE-NEXT: mov w3, #256 ; CHECK-BE-NEXT: stp xzr, xzr, [sp, #8] ; CHECK-BE-NEXT: str xzr, [sp] -; CHECK-BE-NEXT: stp x11, x8, [sp, #40] -; CHECK-BE-NEXT: str x9, [sp, #56] +; CHECK-BE-NEXT: stp x10, x11, [sp, #40] +; CHECK-BE-NEXT: str x8, [sp, #56] ; CHECK-BE-NEXT: bl __udivei4 -; CHECK-BE-NEXT: ldp x9, x8, [sp, #72] -; CHECK-BE-NEXT: ldr x10, [sp, #88] -; CHECK-BE-NEXT: extr x9, x9, x8, #8 -; CHECK-BE-NEXT: extr x8, x8, x10, #8 -; CHECK-BE-NEXT: strb w10, [x19, #16] -; CHECK-BE-NEXT: and x9, x9, #0x1ffffffffffffff -; CHECK-BE-NEXT: stp x9, x8, [x19] +; CHECK-BE-NEXT: ldp x10, x8, [sp, #80] +; CHECK-BE-NEXT: ldr x9, [sp, #72] +; CHECK-BE-NEXT: extr x9, x9, x10, #8 +; CHECK-BE-NEXT: extr x11, x10, x8, #8 +; CHECK-BE-NEXT: strb w8, [x19, #16] +; CHECK-BE-NEXT: and x8, x9, #0x1ffffffffffffff +; CHECK-BE-NEXT: stp x8, x11, [x19] ; CHECK-BE-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-BE-NEXT: add sp, sp, #112 ; CHECK-BE-NEXT: ret @@ -102,9 +102,9 @@ ; CHECK-BE-NEXT: mov w3, #256 ; CHECK-BE-NEXT: str x30, [sp, #96] // 8-byte Folded Spill ; CHECK-BE-NEXT: stp x6, xzr, [sp, #24] -; CHECK-BE-NEXT: stp x9, x5, [sp, #8] ; CHECK-BE-NEXT: str xzr, [sp] ; CHECK-BE-NEXT: str x8, [sp, #40] +; CHECK-BE-NEXT: stp x9, x5, [sp, #8] ; CHECK-BE-NEXT: bl __umodei4 ; CHECK-BE-NEXT: ldp x8, x1, [sp, #72] ; CHECK-BE-NEXT: ldp x2, x30, [sp, #88] // 8-byte Folded Reload @@ -120,8 +120,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #112 ; CHECK-NEXT: sbfx x8, x2, #0, #1 -; CHECK-NEXT: stp x0, x1, [sp, #32] ; CHECK-NEXT: sbfx x9, x6, #0, #1 +; CHECK-NEXT: stp x0, x1, [sp, #32] ; CHECK-NEXT: add x0, sp, #64 ; CHECK-NEXT: add x1, sp, #32 ; CHECK-NEXT: mov x2, sp @@ -142,8 +142,8 @@ ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: sub sp, sp, #112 ; CHECK-BE-NEXT: sbfx x8, x0, #0, #1 -; CHECK-BE-NEXT: stp x1, x2, [sp, #48] ; CHECK-BE-NEXT: sbfx x9, x4, #0, #1 +; CHECK-BE-NEXT: stp x1, x2, [sp, #48] ; CHECK-BE-NEXT: add x0, sp, #64 ; CHECK-BE-NEXT: add x1, sp, #32 ; CHECK-BE-NEXT: mov x2, sp @@ -167,8 +167,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #112 ; CHECK-NEXT: sbfx x8, x2, #0, #1 -; CHECK-NEXT: stp x0, x1, [sp, #32] ; CHECK-NEXT: sbfx x9, x6, #0, #1 +; CHECK-NEXT: stp x0, x1, [sp, #32] ; CHECK-NEXT: add x0, sp, #64 ; CHECK-NEXT: add x1, sp, #32 ; CHECK-NEXT: mov x2, sp @@ -189,8 +189,8 @@ ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: sub sp, sp, #112 ; CHECK-BE-NEXT: sbfx x8, x0, #0, #1 -; CHECK-BE-NEXT: stp x1, x2, [sp, #48] ; CHECK-BE-NEXT: sbfx x9, x4, #0, #1 +; CHECK-BE-NEXT: stp x1, x2, [sp, #48] ; CHECK-BE-NEXT: add x0, sp, #64 ; CHECK-BE-NEXT: add x1, sp, #32 ; CHECK-BE-NEXT: mov x2, sp @@ -245,11 +245,11 @@ ; CHECK-BE-NEXT: sub sp, sp, #208 ; CHECK-BE-NEXT: add x8, sp, #208 ; CHECK-BE-NEXT: str x30, [sp, #192] // 8-byte Folded Spill -; CHECK-BE-NEXT: sbfx x9, x0, #0, #1 -; CHECK-BE-NEXT: add x0, sp, #128 ; CHECK-BE-NEXT: ld1 { v0.2d }, [x8] ; CHECK-BE-NEXT: mov x8, sp ; CHECK-BE-NEXT: add x8, x8, #40 +; CHECK-BE-NEXT: sbfx x9, x0, #0, #1 +; CHECK-BE-NEXT: add x0, sp, #128 ; CHECK-BE-NEXT: st1 { v0.2d }, [x8] ; CHECK-BE-NEXT: ldr x8, [sp, #224] ; CHECK-BE-NEXT: stp x3, x4, [sp, #112] Index: llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -9,16 +9,16 @@ ; AARCH-NEXT: madd x8, x1, x2, x8 ; AARCH-NEXT: umulh x10, x1, x2 ; AARCH-NEXT: adds x8, x9, x8 -; AARCH-NEXT: cset w9, hs +; AARCH-NEXT: umulh x9, x3, x0 +; AARCH-NEXT: cset w11, hs ; AARCH-NEXT: cmp x1, #0 ; AARCH-NEXT: ccmp x3, #0, #4, ne -; AARCH-NEXT: mov x1, x8 -; AARCH-NEXT: ccmp xzr, x10, #0, eq -; AARCH-NEXT: umulh x10, x3, x0 ; AARCH-NEXT: mul x0, x0, x2 ; AARCH-NEXT: ccmp xzr, x10, #0, eq -; AARCH-NEXT: cset w10, ne -; AARCH-NEXT: orr w2, w10, w9 +; AARCH-NEXT: mov x1, x8 +; AARCH-NEXT: ccmp xzr, x9, #0, eq +; AARCH-NEXT: cset w9, ne +; AARCH-NEXT: orr w2, w9, w11 ; AARCH-NEXT: ret start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 Index: llvm/test/CodeGen/AArch64/urem-lkk.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-lkk.ll +++ llvm/test/CodeGen/AArch64/urem-lkk.ll @@ -88,12 +88,12 @@ define i64 @dont_fold_urem_i64(i64 %x) { ; CHECK-LABEL: dont_fold_urem_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #58849 -; CHECK-NEXT: lsr x9, x0, #1 -; CHECK-NEXT: movk x8, #48148, lsl #16 -; CHECK-NEXT: movk x8, #33436, lsl #32 -; CHECK-NEXT: movk x8, #21399, lsl #48 -; CHECK-NEXT: umulh x8, x9, x8 +; CHECK-NEXT: mov x9, #58849 +; CHECK-NEXT: lsr x8, x0, #1 +; CHECK-NEXT: movk x9, #48148, lsl #16 +; CHECK-NEXT: movk x9, #33436, lsl #32 +; CHECK-NEXT: movk x9, #21399, lsl #48 +; CHECK-NEXT: umulh x8, x8, x9 ; CHECK-NEXT: mov w9, #98 ; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: msub x0, x8, x9, x0 Index: llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll @@ -67,11 +67,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: adrp x9, .LCPI4_1 -; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_1 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_1] ; CHECK-NEXT: adrp x8, .LCPI4_2 +; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: mov v0.h[2], w2 ; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: movi d1, #0x0000000000ffff Index: llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll @@ -6,13 +6,13 @@ ; CHECK-LABEL: test_urem_odd_even: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: adrp x9, .LCPI0_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: adrp x8, .LCPI0_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_1] +; CHECK-NEXT: adrp x8, .LCPI0_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_2] ; CHECK-NEXT: adrp x8, .LCPI0_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -34,9 +34,9 @@ ; CHECK-LABEL: test_urem_odd_allones_eq: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: adrp x8, .LCPI1_1 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_1] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s @@ -51,9 +51,9 @@ ; CHECK-LABEL: test_urem_odd_allones_ne: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: adrp x8, .LCPI2_1 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s @@ -70,13 +70,13 @@ ; CHECK-LABEL: test_urem_even_allones_eq: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: adrp x9, .LCPI3_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1] +; CHECK-NEXT: adrp x8, .LCPI3_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_2] ; CHECK-NEXT: adrp x8, .LCPI3_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -94,13 +94,13 @@ ; CHECK-LABEL: test_urem_even_allones_ne: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: adrp x9, .LCPI4_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: adrp x8, .LCPI4_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_2] ; CHECK-NEXT: adrp x8, .LCPI4_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -120,13 +120,13 @@ ; CHECK-LABEL: test_urem_odd_even_allones_eq: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: adrp x9, .LCPI5_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; CHECK-NEXT: adrp x8, .LCPI5_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_1] +; CHECK-NEXT: adrp x8, .LCPI5_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] ; CHECK-NEXT: adrp x8, .LCPI5_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -144,13 +144,13 @@ ; CHECK-LABEL: test_urem_odd_even_allones_ne: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: adrp x9, .LCPI6_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] ; CHECK-NEXT: adrp x8, .LCPI6_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_1] +; CHECK-NEXT: adrp x8, .LCPI6_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_2] ; CHECK-NEXT: adrp x8, .LCPI6_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -172,13 +172,13 @@ ; CHECK-LABEL: test_urem_odd_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI7_0 -; CHECK-NEXT: adrp x9, .LCPI7_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] ; CHECK-NEXT: adrp x8, .LCPI7_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_1] +; CHECK-NEXT: adrp x8, .LCPI7_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_2] ; CHECK-NEXT: adrp x8, .LCPI7_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -198,13 +198,13 @@ ; CHECK-LABEL: test_urem_even_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: adrp x9, .LCPI8_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: adrp x8, .LCPI8_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_1] +; CHECK-NEXT: adrp x8, .LCPI8_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_2] ; CHECK-NEXT: adrp x8, .LCPI8_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -224,13 +224,13 @@ ; CHECK-LABEL: test_urem_odd_even_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: adrp x9, .LCPI9_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0] ; CHECK-NEXT: adrp x8, .LCPI9_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI9_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_1] +; CHECK-NEXT: adrp x8, .LCPI9_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_2] ; CHECK-NEXT: adrp x8, .LCPI9_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -253,9 +253,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #52429 ; CHECK-NEXT: movk w8, #52428, lsl #16 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI10_0 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s @@ -273,11 +273,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #28087 ; CHECK-NEXT: movk w8, #46811, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI11_0 -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b @@ -295,13 +295,13 @@ ; CHECK-LABEL: test_urem_odd_even_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: adrp x9, .LCPI12_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: adrp x8, .LCPI12_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI12_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_1] +; CHECK-NEXT: adrp x8, .LCPI12_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_2] ; CHECK-NEXT: adrp x8, .LCPI12_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -323,13 +323,13 @@ ; CHECK-LABEL: test_urem_odd_INT_MIN: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 -; CHECK-NEXT: adrp x9, .LCPI13_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: adrp x8, .LCPI13_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI13_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_1] +; CHECK-NEXT: adrp x8, .LCPI13_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_2] ; CHECK-NEXT: adrp x8, .LCPI13_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -349,13 +349,13 @@ ; CHECK-LABEL: test_urem_even_INT_MIN: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: adrp x9, .LCPI14_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: adrp x8, .LCPI14_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] +; CHECK-NEXT: adrp x8, .LCPI14_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_2] ; CHECK-NEXT: adrp x8, .LCPI14_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -375,13 +375,13 @@ ; CHECK-LABEL: test_urem_odd_even_INT_MIN: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: adrp x9, .LCPI15_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: adrp x8, .LCPI15_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI15_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] +; CHECK-NEXT: adrp x8, .LCPI15_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_2] ; CHECK-NEXT: adrp x8, .LCPI15_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -403,13 +403,13 @@ ; CHECK-LABEL: test_urem_odd_allones_and_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: adrp x9, .LCPI16_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: adrp x8, .LCPI16_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] +; CHECK-NEXT: adrp x8, .LCPI16_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_2] ; CHECK-NEXT: adrp x8, .LCPI16_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -429,13 +429,13 @@ ; CHECK-LABEL: test_urem_even_allones_and_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: adrp x9, .LCPI17_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: adrp x8, .LCPI17_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI17_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] +; CHECK-NEXT: adrp x8, .LCPI17_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_2] ; CHECK-NEXT: adrp x8, .LCPI17_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -455,13 +455,13 @@ ; CHECK-LABEL: test_urem_odd_even_allones_and_poweroftwo: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: adrp x9, .LCPI18_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: adrp x8, .LCPI18_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_1] +; CHECK-NEXT: adrp x8, .LCPI18_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_2] ; CHECK-NEXT: adrp x8, .LCPI18_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -483,9 +483,9 @@ ; CHECK-LABEL: test_urem_odd_allones_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] ; CHECK-NEXT: adrp x8, .LCPI19_1 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_1] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s @@ -502,13 +502,13 @@ ; CHECK-LABEL: test_urem_even_allones_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI20_0 -; CHECK-NEXT: adrp x9, .LCPI20_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: adrp x8, .LCPI20_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI20_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_1] +; CHECK-NEXT: adrp x8, .LCPI20_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_2] ; CHECK-NEXT: adrp x8, .LCPI20_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -528,13 +528,13 @@ ; CHECK-LABEL: test_urem_odd_even_allones_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 -; CHECK-NEXT: adrp x9, .LCPI21_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] ; CHECK-NEXT: adrp x8, .LCPI21_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI21_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_1] +; CHECK-NEXT: adrp x8, .LCPI21_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_2] ; CHECK-NEXT: adrp x8, .LCPI21_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -556,13 +556,13 @@ ; CHECK-LABEL: test_urem_odd_poweroftwo_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 -; CHECK-NEXT: adrp x9, .LCPI22_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] ; CHECK-NEXT: adrp x8, .LCPI22_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI22_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_1] +; CHECK-NEXT: adrp x8, .LCPI22_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_2] ; CHECK-NEXT: adrp x8, .LCPI22_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -582,13 +582,13 @@ ; CHECK-LABEL: test_urem_even_poweroftwo_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 -; CHECK-NEXT: adrp x9, .LCPI23_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] ; CHECK-NEXT: adrp x8, .LCPI23_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI23_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_1] +; CHECK-NEXT: adrp x8, .LCPI23_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_2] ; CHECK-NEXT: adrp x8, .LCPI23_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -608,13 +608,13 @@ ; CHECK-LABEL: test_urem_odd_even_poweroftwo_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI24_0 -; CHECK-NEXT: adrp x9, .LCPI24_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: adrp x8, .LCPI24_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI24_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_1] +; CHECK-NEXT: adrp x8, .LCPI24_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_2] ; CHECK-NEXT: adrp x8, .LCPI24_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -635,13 +635,13 @@ ; CHECK-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI25_0 -; CHECK-NEXT: adrp x9, .LCPI25_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: adrp x8, .LCPI25_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI25_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_1] +; CHECK-NEXT: adrp x8, .LCPI25_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_2] ; CHECK-NEXT: adrp x8, .LCPI25_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s @@ -660,13 +660,13 @@ ; CHECK-LABEL: test_urem_even_allones_and_poweroftwo_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI26_0 -; CHECK-NEXT: adrp x9, .LCPI26_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] ; CHECK-NEXT: adrp x8, .LCPI26_1 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI26_2] +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_1] +; CHECK-NEXT: adrp x8, .LCPI26_2 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_2] ; CHECK-NEXT: adrp x8, .LCPI26_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s Index: llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll @@ -31,8 +31,8 @@ ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: mov w8, #13106 -; CHECK-NEXT: movk w8, #13107, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: movk w8, #13107, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s @@ -49,11 +49,11 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: mov w8, #43691 ; CHECK-NEXT: movk w8, #43690, lsl #16 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: mov w8, #43690 +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movk w8, #10922, lsl #16 -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s ; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 @@ -70,11 +70,11 @@ ; CHECK-LABEL: t32_6_part1: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: mov w9, #43691 -; CHECK-NEXT: movk w9, #43690, lsl #16 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] Index: llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll @@ -7,12 +7,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #23593 ; CHECK-NEXT: movk w8, #49807, lsl #16 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: mov w8, #28835 ; CHECK-NEXT: movk w8, #2621, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret @@ -28,12 +28,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #23593 ; CHECK-NEXT: movk w8, #49807, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: mov w8, #23592 ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v2.4s, w8 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: shl v1.4s, v0.4s, #30 ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b @@ -53,9 +53,9 @@ ; CHECK-LABEL: test_urem_odd_neg25: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: adrp x8, .LCPI2_1 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s @@ -72,11 +72,11 @@ ; CHECK-LABEL: test_urem_even_neg100: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] +; CHECK-NEXT: movi v3.4s, #1 +; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v1.4s, v0.4s, #30 ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b Index: llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll @@ -5,9 +5,9 @@ ; CHECK-LABEL: t0_all_tautological: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: adrp x9, .LCPI0_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] +; CHECK-NEXT: adrp x8, .LCPI0_1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s ; CHECK-NEXT: xtn v0.4h, v0.4s @@ -76,16 +76,16 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-LABEL: t3_wide: ; CHECK: // %bb.0: +; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x8, #-6148914691236517206 -; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: movk x8, #43691 -; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: mul x9, x9, x8 -; CHECK-NEXT: mul x8, x10, x8 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: adrp x9, .LCPI4_0 +; CHECK-NEXT: mov x9, v0.d[1] +; CHECK-NEXT: mul x10, x10, x8 +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov v0.d[1], x8 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI4_0] +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: cmhs v0.2d, v1.2d, v0.2d ; CHECK-NEXT: movi d1, #0xffffffff00000000 ; CHECK-NEXT: xtn v0.2s, v0.2d Index: llvm/test/CodeGen/AArch64/urem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-vector-lkk.ll +++ llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -5,41 +5,41 @@ ; CHECK-LABEL: fold_urem_vec_1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov w9, #8969 -; CHECK-NEXT: movk w9, #22765, lsl #16 +; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: mov w8, #8969 +; CHECK-NEXT: movk w8, #22765, lsl #16 ; CHECK-NEXT: umov w10, v0.h[1] -; CHECK-NEXT: mov w12, #16913 -; CHECK-NEXT: mov w13, #95 -; CHECK-NEXT: movk w12, #8456, lsl #16 -; CHECK-NEXT: umull x9, w8, w9 -; CHECK-NEXT: ubfx w14, w10, #2, #14 -; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: sub w11, w8, w9 -; CHECK-NEXT: umull x12, w14, w12 -; CHECK-NEXT: add w9, w9, w11, lsr #1 -; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: lsr w9, w9, #6 -; CHECK-NEXT: lsr x12, x12, #34 -; CHECK-NEXT: msub w8, w9, w13, w8 +; CHECK-NEXT: umov w13, v0.h[2] +; CHECK-NEXT: umull x8, w9, w8 +; CHECK-NEXT: ubfx w12, w10, #2, #14 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: sub w11, w9, w8 +; CHECK-NEXT: add w8, w8, w11, lsr #1 +; CHECK-NEXT: mov w11, #16913 +; CHECK-NEXT: movk w11, #8456, lsl #16 +; CHECK-NEXT: umull x11, w12, w11 +; CHECK-NEXT: lsr w8, w8, #6 +; CHECK-NEXT: mov w12, #95 +; CHECK-NEXT: msub w8, w8, w12, w9 ; CHECK-NEXT: mov w9, #33437 +; CHECK-NEXT: lsr x11, x11, #34 ; CHECK-NEXT: movk w9, #21399, lsl #16 -; CHECK-NEXT: mov w13, #124 -; CHECK-NEXT: umull x9, w11, w9 -; CHECK-NEXT: msub w10, w12, w13, w10 -; CHECK-NEXT: umov w12, v0.h[3] +; CHECK-NEXT: mov w12, #124 +; CHECK-NEXT: umull x9, w13, w9 +; CHECK-NEXT: msub w10, w11, w12, w10 +; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov w13, #2287 -; CHECK-NEXT: lsr x8, x9, #37 -; CHECK-NEXT: mov w9, #98 -; CHECK-NEXT: movk w13, #16727, lsl #16 -; CHECK-NEXT: msub w8, w8, w9, w11 +; CHECK-NEXT: mov w8, #2287 +; CHECK-NEXT: movk w8, #16727, lsl #16 +; CHECK-NEXT: lsr x9, x9, #37 ; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: umull x9, w12, w13 -; CHECK-NEXT: mov w10, #1003 -; CHECK-NEXT: lsr x9, x9, #40 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: msub w8, w9, w10, w12 +; CHECK-NEXT: mov w10, #98 +; CHECK-NEXT: umull x8, w11, w8 +; CHECK-NEXT: msub w9, w9, w10, w13 +; CHECK-NEXT: lsr x8, x8, #40 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov w9, #1003 +; CHECK-NEXT: msub w8, w8, w9, w11 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -51,40 +51,40 @@ ; CHECK-LABEL: fold_urem_vec_2: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: mov w8, #8969 -; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: umov w12, v0.h[2] ; CHECK-NEXT: movk w8, #22765, lsl #16 -; CHECK-NEXT: umov w15, v0.h[2] -; CHECK-NEXT: umov w16, v0.h[3] -; CHECK-NEXT: umull x12, w10, w8 +; CHECK-NEXT: umov w14, v0.h[3] ; CHECK-NEXT: umull x11, w9, w8 -; CHECK-NEXT: lsr x12, x12, #32 +; CHECK-NEXT: umull x13, w10, w8 +; CHECK-NEXT: umull x15, w12, w8 ; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: sub w14, w10, w12 -; CHECK-NEXT: sub w13, w9, w11 -; CHECK-NEXT: add w12, w12, w14, lsr #1 -; CHECK-NEXT: umull x14, w15, w8 -; CHECK-NEXT: add w11, w11, w13, lsr #1 -; CHECK-NEXT: mov w13, #95 -; CHECK-NEXT: lsr w12, w12, #6 +; CHECK-NEXT: umull x8, w14, w8 +; CHECK-NEXT: lsr x13, x13, #32 +; CHECK-NEXT: sub w16, w9, w11 +; CHECK-NEXT: lsr x15, x15, #32 +; CHECK-NEXT: add w11, w11, w16, lsr #1 +; CHECK-NEXT: sub w16, w10, w13 +; CHECK-NEXT: add w13, w13, w16, lsr #1 +; CHECK-NEXT: mov w16, #95 ; CHECK-NEXT: lsr w11, w11, #6 -; CHECK-NEXT: umull x8, w16, w8 -; CHECK-NEXT: msub w10, w12, w13, w10 -; CHECK-NEXT: lsr x12, x14, #32 -; CHECK-NEXT: msub w9, w11, w13, w9 -; CHECK-NEXT: sub w11, w15, w12 ; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: lsr w13, w13, #6 +; CHECK-NEXT: msub w9, w11, w16, w9 +; CHECK-NEXT: msub w10, w13, w16, w10 +; CHECK-NEXT: sub w13, w12, w15 +; CHECK-NEXT: add w11, w15, w13, lsr #1 ; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: add w10, w12, w11, lsr #1 -; CHECK-NEXT: lsr w10, w10, #6 -; CHECK-NEXT: sub w11, w16, w8 +; CHECK-NEXT: lsr w10, w11, #6 +; CHECK-NEXT: msub w10, w10, w16, w12 ; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: msub w9, w10, w13, w15 -; CHECK-NEXT: add w8, w8, w11, lsr #1 +; CHECK-NEXT: sub w9, w14, w8 +; CHECK-NEXT: add w8, w8, w9, lsr #1 ; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: msub w8, w8, w13, w16 +; CHECK-NEXT: mov v0.h[2], w10 +; CHECK-NEXT: msub w8, w8, w16, w14 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -103,41 +103,41 @@ ; CHECK-NEXT: movk w8, #22765, lsl #16 ; CHECK-NEXT: umov w10, v0.h[1] ; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: mov w15, #95 -; CHECK-NEXT: umov w13, v0.h[3] -; CHECK-NEXT: umull x12, w9, w8 +; CHECK-NEXT: umov w12, v0.h[3] +; CHECK-NEXT: mov w16, #95 +; CHECK-NEXT: umull x13, w9, w8 ; CHECK-NEXT: umull x14, w10, w8 -; CHECK-NEXT: lsr x12, x12, #32 -; CHECK-NEXT: umull x17, w11, w8 -; CHECK-NEXT: sub w16, w9, w12 +; CHECK-NEXT: umull x15, w11, w8 +; CHECK-NEXT: lsr x13, x13, #32 +; CHECK-NEXT: umull x8, w12, w8 ; CHECK-NEXT: lsr x14, x14, #32 -; CHECK-NEXT: lsr x17, x17, #32 -; CHECK-NEXT: umull x8, w13, w8 -; CHECK-NEXT: add w12, w12, w16, lsr #1 -; CHECK-NEXT: sub w16, w10, w14 -; CHECK-NEXT: lsr w12, w12, #6 -; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w14, w14, w16, lsr #1 -; CHECK-NEXT: sub w16, w11, w17 -; CHECK-NEXT: msub w9, w12, w15, w9 +; CHECK-NEXT: sub w17, w9, w13 +; CHECK-NEXT: lsr x15, x15, #32 +; CHECK-NEXT: add w13, w13, w17, lsr #1 +; CHECK-NEXT: sub w17, w10, w14 +; CHECK-NEXT: add w14, w14, w17, lsr #1 +; CHECK-NEXT: sub w17, w11, w15 +; CHECK-NEXT: lsr w13, w13, #6 +; CHECK-NEXT: add w15, w15, w17, lsr #1 ; CHECK-NEXT: lsr w14, w14, #6 -; CHECK-NEXT: add w16, w17, w16, lsr #1 -; CHECK-NEXT: fmov s1, w12 -; CHECK-NEXT: msub w10, w14, w15, w10 -; CHECK-NEXT: sub w17, w13, w8 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: lsr w9, w16, #6 -; CHECK-NEXT: mov v1.h[1], w14 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: msub w9, w13, w16, w9 +; CHECK-NEXT: fmov s0, w13 +; CHECK-NEXT: msub w10, w14, w16, w10 +; CHECK-NEXT: lsr w15, w15, #6 +; CHECK-NEXT: sub w17, w12, w8 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: add w8, w8, w17, lsr #1 -; CHECK-NEXT: msub w11, w9, w15, w11 +; CHECK-NEXT: mov v0.h[1], w14 +; CHECK-NEXT: msub w9, w15, w16, w11 ; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: msub w10, w8, w15, w13 +; CHECK-NEXT: mov v1.h[1], w10 +; CHECK-NEXT: msub w10, w8, w16, w12 +; CHECK-NEXT: mov v0.h[2], w15 ; CHECK-NEXT: mov v1.h[2], w9 -; CHECK-NEXT: mov v0.h[2], w11 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: mov v0.h[3], w10 -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: mov v1.h[3], w10 +; CHECK-NEXT: add v0.4h, v1.4h, v0.4h ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, %2 = udiv <4 x i16> %x, @@ -151,25 +151,25 @@ ; CHECK-LABEL: dont_fold_urem_power_of_two: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w9, v0.h[3] +; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: umov w11, v0.h[1] ; CHECK-NEXT: movk w8, #22765, lsl #16 -; CHECK-NEXT: and w10, w10, #0x3f ; CHECK-NEXT: umull x8, w9, w8 -; CHECK-NEXT: and w11, w11, #0x1f -; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: and w10, w10, #0x3f ; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: sub w12, w9, w8 -; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: add w8, w8, w12, lsr #1 -; CHECK-NEXT: and w10, w10, #0x7 +; CHECK-NEXT: and w10, w11, #0x1f +; CHECK-NEXT: umov w11, v0.h[2] +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: mov v1.h[1], w10 +; CHECK-NEXT: sub w10, w9, w8 +; CHECK-NEXT: add w8, w8, w10, lsr #1 +; CHECK-NEXT: and w10, w11, #0x7 ; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: mov w11, #95 -; CHECK-NEXT: msub w8, w8, w11, w9 ; CHECK-NEXT: mov v1.h[2], w10 +; CHECK-NEXT: mov w10, #95 +; CHECK-NEXT: msub w8, w8, w10, w9 ; CHECK-NEXT: mov v1.h[3], w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret @@ -188,25 +188,25 @@ ; CHECK-NEXT: umov w11, v0.h[2] ; CHECK-NEXT: mov w12, #654 ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: mov w13, #47143 ; CHECK-NEXT: ubfx w10, w9, #1, #15 -; CHECK-NEXT: movk w13, #24749, lsl #16 ; CHECK-NEXT: umull x8, w10, w8 ; CHECK-NEXT: mov w10, #17097 ; CHECK-NEXT: movk w10, #45590, lsl #16 -; CHECK-NEXT: lsr x8, x8, #40 ; CHECK-NEXT: umull x10, w11, w10 +; CHECK-NEXT: lsr x8, x8, #40 ; CHECK-NEXT: msub w8, w8, w12, w9 ; CHECK-NEXT: umov w9, v0.h[3] +; CHECK-NEXT: mov w12, #47143 ; CHECK-NEXT: lsr x10, x10, #36 -; CHECK-NEXT: mov w12, #23 -; CHECK-NEXT: msub w10, w10, w12, w11 -; CHECK-NEXT: mov w11, #5423 +; CHECK-NEXT: movk w12, #24749, lsl #16 ; CHECK-NEXT: mov v1.h[1], w8 -; CHECK-NEXT: umull x8, w9, w13 -; CHECK-NEXT: lsr x8, x8, #43 -; CHECK-NEXT: mov v1.h[2], w10 -; CHECK-NEXT: msub w8, w8, w11, w9 +; CHECK-NEXT: mov w8, #23 +; CHECK-NEXT: umull x12, w9, w12 +; CHECK-NEXT: msub w8, w10, w8, w11 +; CHECK-NEXT: lsr x10, x12, #43 +; CHECK-NEXT: mov v1.h[2], w8 +; CHECK-NEXT: mov w8, #5423 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: mov v1.h[3], w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret @@ -230,35 +230,35 @@ ; CHECK-NEXT: mov x8, #17097 ; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: movk x8, #45590, lsl #16 -; CHECK-NEXT: mov x13, #21445 +; CHECK-NEXT: mov x12, #12109 ; CHECK-NEXT: movk x8, #34192, lsl #32 -; CHECK-NEXT: movk x13, #1603, lsl #16 +; CHECK-NEXT: movk x12, #52170, lsl #16 ; CHECK-NEXT: movk x8, #25644, lsl #48 -; CHECK-NEXT: movk x13, #15432, lsl #32 -; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: movk x13, #25653, lsl #48 +; CHECK-NEXT: movk x12, #28749, lsl #32 ; CHECK-NEXT: umulh x8, x9, x8 +; CHECK-NEXT: movk x12, #49499, lsl #48 +; CHECK-NEXT: mov x10, v0.d[1] ; CHECK-NEXT: mov x11, v1.d[1] -; CHECK-NEXT: sub x12, x9, x8 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: sub x13, x9, x8 +; CHECK-NEXT: add x8, x8, x13, lsr #1 +; CHECK-NEXT: mov x13, #21445 +; CHECK-NEXT: movk x13, #1603, lsl #16 ; CHECK-NEXT: lsr x14, x10, #1 -; CHECK-NEXT: add x8, x8, x12, lsr #1 -; CHECK-NEXT: mov x12, #12109 -; CHECK-NEXT: movk x12, #52170, lsl #16 +; CHECK-NEXT: movk x13, #15432, lsl #32 +; CHECK-NEXT: umulh x12, x11, x12 +; CHECK-NEXT: movk x13, #25653, lsl #48 +; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: umulh x13, x14, x13 -; CHECK-NEXT: movk x12, #28749, lsl #32 ; CHECK-NEXT: mov w14, #23 -; CHECK-NEXT: movk x12, #49499, lsl #48 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: lsr x13, x13, #7 -; CHECK-NEXT: umulh x12, x11, x12 ; CHECK-NEXT: msub x8, x8, x14, x9 -; CHECK-NEXT: mov w9, #5423 -; CHECK-NEXT: lsr x12, x12, #12 -; CHECK-NEXT: mov w14, #654 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: msub x9, x12, x9, x11 -; CHECK-NEXT: msub x10, x13, x14, x10 +; CHECK-NEXT: lsr x9, x12, #12 +; CHECK-NEXT: mov w12, #5423 +; CHECK-NEXT: lsr x13, x13, #7 +; CHECK-NEXT: msub x9, x9, x12, x11 +; CHECK-NEXT: mov w11, #654 ; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: msub x10, x13, x11, x10 ; CHECK-NEXT: mov v1.d[1], x9 ; CHECK-NEXT: mov v0.d[1], x10 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/usub_sat_vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -55,9 +55,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqsub v0.16b, v0.16b, v4.16b ; CHECK-NEXT: uqsub v1.16b, v1.16b, v5.16b +; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqsub v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -86,9 +86,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqsub v0.8h, v0.8h, v4.8h ; CHECK-NEXT: uqsub v1.8h, v1.8h, v5.8h +; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqsub v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -98,9 +98,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -133,12 +133,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x1] ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: ldrb w10, [x1, #1] +; CHECK-NEXT: ldrb w10, [x0, #1] +; CHECK-NEXT: ldrb w11, [x1, #1] ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -155,9 +155,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -172,12 +172,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x1] ; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: ldrh w10, [x1, #2] +; CHECK-NEXT: ldrh w10, [x0, #2] +; CHECK-NEXT: ldrh w11, [x1, #2] ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: ldrh w9, [x0, #2] -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -220,9 +220,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x1] -; CHECK-NEXT: ldr b1, [x0] -; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: ldr b1, [x1] +; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -235,9 +235,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x1] -; CHECK-NEXT: ldr h1, [x0] -; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ldr h1, [x1] +; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -301,9 +301,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqsub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: uqsub v1.4s, v1.4s, v5.4s +; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqsub v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -332,9 +332,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqsub v0.2d, v0.2d, v4.2d ; CHECK-NEXT: uqsub v1.2d, v1.2d, v5.2d +; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqsub v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) Index: llvm/test/CodeGen/AArch64/vec_uaddo.ll =================================================================== --- llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -52,8 +52,8 @@ ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: st1 { v1.s }[2], [x8] +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret %t = call {<3 x i32>, <3 x i1>} @llvm.uadd.with.overflow.v3i32(<3 x i32> %a0, <3 x i32> %a1) %val = extractvalue {<3 x i32>, <3 x i1>} %t, 0 @@ -85,25 +85,25 @@ ; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ldr s2, [sp, #16] -; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: add x10, sp, #8 -; CHECK-NEXT: mov v0.s[1], w7 ; CHECK-NEXT: fmov s3, w4 +; CHECK-NEXT: add x9, sp, #8 +; CHECK-NEXT: mov v0.s[1], w7 ; CHECK-NEXT: mov v1.s[1], w1 -; CHECK-NEXT: ld1 { v2.s }[1], [x9] ; CHECK-NEXT: mov v3.s[1], w5 ; CHECK-NEXT: ld1 { v0.s }[2], [x8] +; CHECK-NEXT: add x8, sp, #24 ; CHECK-NEXT: mov v1.s[2], w2 +; CHECK-NEXT: ld1 { v2.s }[1], [x8] ; CHECK-NEXT: ldr x8, [sp, #32] -; CHECK-NEXT: add v2.4s, v3.4s, v2.4s -; CHECK-NEXT: ld1 { v0.s }[3], [x10] +; CHECK-NEXT: ld1 { v0.s }[3], [x9] ; CHECK-NEXT: mov v1.s[3], w3 -; CHECK-NEXT: str d2, [x8, #16] +; CHECK-NEXT: add v2.4s, v3.4s, v2.4s ; CHECK-NEXT: cmhi v3.4s, v3.4s, v2.4s -; CHECK-NEXT: mov w5, v3.s[1] -; CHECK-NEXT: fmov w4, s3 +; CHECK-NEXT: str d2, [x8, #16] ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: mov w5, v3.s[1] ; CHECK-NEXT: cmhi v1.4s, v1.4s, v0.4s +; CHECK-NEXT: fmov w4, s3 ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] @@ -121,10 +121,10 @@ define <8 x i32> @uaddo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) nounwind { ; CHECK-LABEL: uaddo_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add v3.4s, v1.4s, v3.4s ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s -; CHECK-NEXT: cmhi v1.4s, v1.4s, v3.4s +; CHECK-NEXT: add v3.4s, v1.4s, v3.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s +; CHECK-NEXT: cmhi v1.4s, v1.4s, v3.4s ; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: ret %t = call {<8 x i32>, <8 x i1>} @llvm.uadd.with.overflow.v8i32(<8 x i32> %a0, <8 x i32> %a1) @@ -250,19 +250,19 @@ ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: umov w10, v0.h[0] -; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: and v1.8b, v0.8b, v2.8b -; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h ; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: bfi w9, w8, #1, #1 +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h +; CHECK-NEXT: bfi w9, w10, #2, #1 ; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: bfi w9, w8, #3, #29 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: bfi w10, w8, #1, #1 -; CHECK-NEXT: bfi w10, w9, #2, #1 -; CHECK-NEXT: bfi w10, w11, #3, #29 -; CHECK-NEXT: and w8, w10, #0xf +; CHECK-NEXT: and w8, w9, #0xf ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) Index: llvm/test/CodeGen/AArch64/vec_umulo.ll =================================================================== --- llvm/test/CodeGen/AArch64/vec_umulo.ll +++ llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -60,8 +60,7 @@ ; CHECK-NEXT: uzp2 v2.4s, v3.4s, v2.4s ; CHECK-NEXT: st1 { v1.s }[2], [x8] ; CHECK-NEXT: str d1, [x0] -; CHECK-NEXT: cmtst v2.4s, v2.4s, v2.4s -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: cmtst v0.4s, v2.4s, v2.4s ; CHECK-NEXT: ret %t = call {<3 x i32>, <3 x i1>} @llvm.umul.with.overflow.v3i32(<3 x i32> %a0, <3 x i32> %a1) %val = extractvalue {<3 x i32>, <3 x i1>} %t, 0 @@ -97,35 +96,35 @@ ; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ldr s2, [sp, #16] -; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: add x10, sp, #8 -; CHECK-NEXT: mov v0.s[1], w7 ; CHECK-NEXT: fmov s3, w4 +; CHECK-NEXT: mov v0.s[1], w7 ; CHECK-NEXT: mov v1.s[1], w1 -; CHECK-NEXT: ld1 { v2.s }[1], [x9] ; CHECK-NEXT: mov v3.s[1], w5 ; CHECK-NEXT: ld1 { v0.s }[2], [x8] +; CHECK-NEXT: add x8, sp, #8 ; CHECK-NEXT: mov v1.s[2], w2 -; CHECK-NEXT: ldr x8, [sp, #32] -; CHECK-NEXT: umull2 v4.2d, v3.4s, v2.4s -; CHECK-NEXT: ld1 { v0.s }[3], [x10] +; CHECK-NEXT: ld1 { v0.s }[3], [x8] +; CHECK-NEXT: add x8, sp, #24 ; CHECK-NEXT: mov v1.s[3], w3 +; CHECK-NEXT: ld1 { v2.s }[1], [x8] +; CHECK-NEXT: ldr x8, [sp, #32] +; CHECK-NEXT: umull2 v4.2d, v1.4s, v0.4s +; CHECK-NEXT: umull v5.2d, v1.2s, v0.2s +; CHECK-NEXT: umull2 v6.2d, v3.4s, v2.4s ; CHECK-NEXT: umull v7.2d, v3.2s, v2.2s ; CHECK-NEXT: mul v2.4s, v3.4s, v2.4s -; CHECK-NEXT: umull2 v5.2d, v1.4s, v0.4s -; CHECK-NEXT: umull v6.2d, v1.2s, v0.2s -; CHECK-NEXT: uzp2 v4.4s, v7.4s, v4.4s -; CHECK-NEXT: str d2, [x8, #16] +; CHECK-NEXT: uzp2 v4.4s, v5.4s, v4.4s ; CHECK-NEXT: mul v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s -; CHECK-NEXT: cmtst v4.4s, v4.4s, v4.4s +; CHECK-NEXT: uzp2 v5.4s, v7.4s, v6.4s +; CHECK-NEXT: str d2, [x8, #16] +; CHECK-NEXT: cmtst v3.4s, v4.4s, v4.4s ; CHECK-NEXT: str q0, [x8] -; CHECK-NEXT: cmtst v3.4s, v5.4s, v5.4s -; CHECK-NEXT: mov w5, v4.s[1] -; CHECK-NEXT: fmov w4, s4 +; CHECK-NEXT: cmtst v4.4s, v5.4s, v5.4s ; CHECK-NEXT: mov w1, v3.s[1] ; CHECK-NEXT: mov w2, v3.s[2] ; CHECK-NEXT: mov w3, v3.s[3] +; CHECK-NEXT: mov w5, v4.s[1] +; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: fmov w0, s3 ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.umul.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) @@ -139,19 +138,18 @@ define <8 x i32> @umulo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) nounwind { ; CHECK-LABEL: umulo_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: umull2 v4.2d, v1.4s, v3.4s -; CHECK-NEXT: umull2 v5.2d, v0.4s, v2.4s -; CHECK-NEXT: umull v6.2d, v0.2s, v2.2s +; CHECK-NEXT: umull2 v4.2d, v0.4s, v2.4s +; CHECK-NEXT: umull v5.2d, v0.2s, v2.2s +; CHECK-NEXT: umull2 v6.2d, v1.4s, v3.4s ; CHECK-NEXT: umull v7.2d, v1.2s, v3.2s -; CHECK-NEXT: mul v3.4s, v1.4s, v3.4s ; CHECK-NEXT: mul v2.4s, v0.4s, v2.4s -; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s -; CHECK-NEXT: uzp2 v6.4s, v7.4s, v4.4s -; CHECK-NEXT: stp q2, q3, [x0] -; CHECK-NEXT: cmtst v4.4s, v5.4s, v5.4s -; CHECK-NEXT: cmtst v5.4s, v6.4s, v6.4s -; CHECK-NEXT: mov v0.16b, v4.16b -; CHECK-NEXT: mov v1.16b, v5.16b +; CHECK-NEXT: uzp2 v4.4s, v5.4s, v4.4s +; CHECK-NEXT: uzp2 v5.4s, v7.4s, v6.4s +; CHECK-NEXT: mul v6.4s, v1.4s, v3.4s +; CHECK-NEXT: cmtst v3.4s, v4.4s, v4.4s +; CHECK-NEXT: cmtst v1.4s, v5.4s, v5.4s +; CHECK-NEXT: mov v0.16b, v3.16b +; CHECK-NEXT: stp q2, q6, [x0] ; CHECK-NEXT: ret %t = call {<8 x i32>, <8 x i1>} @llvm.umul.with.overflow.v8i32(<8 x i32> %a0, <8 x i32> %a1) %val = extractvalue {<8 x i32>, <8 x i1>} %t, 0 @@ -201,21 +199,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h ; CHECK-NEXT: umull v3.4s, v0.4h, v1.4h +; CHECK-NEXT: mul v4.8h, v0.8h, v1.8h ; CHECK-NEXT: uzp2 v2.8h, v3.8h, v2.8h +; CHECK-NEXT: str q4, [x0] ; CHECK-NEXT: cmtst v2.8h, v2.8h, v2.8h ; CHECK-NEXT: xtn v2.8b, v2.8h ; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b ; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: shl v4.4s, v2.4s, #31 -; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 -; CHECK-NEXT: cmlt v3.4s, v4.4s, #0 -; CHECK-NEXT: mul v4.8h, v0.8h, v1.8h -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: mov v1.16b, v3.16b -; CHECK-NEXT: str q4, [x0] +; CHECK-NEXT: ushll v0.4s, v3.4h, #0 +; CHECK-NEXT: ushll v1.4s, v2.4h, #0 +; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: shl v1.4s, v1.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 +; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-NEXT: ret %t = call {<8 x i16>, <8 x i1>} @llvm.umul.with.overflow.v8i16(<8 x i16> %a0, <8 x i16> %a1) %val = extractvalue {<8 x i16>, <8 x i1>} %t, 0 @@ -234,12 +230,12 @@ ; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: umulh x12, x9, x8 ; CHECK-NEXT: umulh x13, x11, x10 -; CHECK-NEXT: cmp xzr, x12 ; CHECK-NEXT: mul x10, x11, x10 +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: cmp xzr, x12 ; CHECK-NEXT: csetm x12, ne ; CHECK-NEXT: cmp xzr, x13 ; CHECK-NEXT: csetm x13, ne -; CHECK-NEXT: mul x8, x9, x8 ; CHECK-NEXT: fmov d1, x10 ; CHECK-NEXT: fmov d0, x13 ; CHECK-NEXT: mov v1.d[1], x8 @@ -260,29 +256,29 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: bic v0.4s, #255, lsl #24 -; CHECK-NEXT: mul v2.4s, v0.4s, v1.4s -; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s -; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v2.s[3] -; CHECK-NEXT: mov w10, v2.s[2] -; CHECK-NEXT: mov w11, v2.s[1] -; CHECK-NEXT: ushr v1.4s, v2.4s, #24 -; CHECK-NEXT: uzp2 v0.4s, v0.4s, v3.4s -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: cmtst v1.4s, v1.4s, v1.4s +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s +; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp2 v1.4s, v3.4s, v2.4s +; CHECK-NEXT: mov w8, v0.s[3] +; CHECK-NEXT: ushr v2.4s, v0.4s, #24 +; CHECK-NEXT: mov w9, v0.s[2] +; CHECK-NEXT: mov w10, v0.s[1] +; CHECK-NEXT: fmov w11, s0 +; CHECK-NEXT: cmtst v0.4s, v2.4s, v2.4s ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: strh w10, [x0, #6] +; CHECK-NEXT: cmeq v1.4s, v1.4s, #0 +; CHECK-NEXT: strh w9, [x0, #6] +; CHECK-NEXT: sturh w10, [x0, #3] +; CHECK-NEXT: lsr w9, w9, #16 ; CHECK-NEXT: lsr w10, w10, #16 -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: sturh w11, [x0, #3] -; CHECK-NEXT: lsr w11, w11, #16 ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: lsr w8, w9, #16 -; CHECK-NEXT: strh w9, [x0] -; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b -; CHECK-NEXT: strb w10, [x0, #8] -; CHECK-NEXT: strb w11, [x0, #5] +; CHECK-NEXT: lsr w8, w11, #16 +; CHECK-NEXT: strh w11, [x0] +; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b +; CHECK-NEXT: strb w9, [x0, #8] +; CHECK-NEXT: strb w10, [x0, #5] ; CHECK-NEXT: strb w8, [x0, #2] ; CHECK-NEXT: ret %t = call {<4 x i24>, <4 x i1>} @llvm.umul.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1) @@ -296,19 +292,18 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind { ; CHECK-LABEL: umulo_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d2, d0 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: and v1.8b, v2.8b, v1.8b -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: umov w9, v1.h[2] -; CHECK-NEXT: umov w10, v1.h[0] -; CHECK-NEXT: umov w11, v1.h[3] +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: bfi w10, w8, #1, #1 -; CHECK-NEXT: bfi w10, w9, #2, #1 -; CHECK-NEXT: bfi w10, w11, #3, #29 -; CHECK-NEXT: and w8, w10, #0xf +; CHECK-NEXT: bfi w9, w8, #1, #1 +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: bfi w9, w10, #2, #1 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: bfi w9, w8, #3, #29 +; CHECK-NEXT: and w8, w9, #0xf ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) @@ -325,37 +320,37 @@ ; CHECK-NEXT: mul x8, x7, x2 ; CHECK-NEXT: umulh x9, x2, x6 ; CHECK-NEXT: madd x8, x3, x6, x8 -; CHECK-NEXT: umulh x10, x3, x6 +; CHECK-NEXT: umulh x11, x3, x6 +; CHECK-NEXT: umulh x10, x7, x2 +; CHECK-NEXT: umulh x13, x0, x4 ; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: umulh x11, x7, x2 -; CHECK-NEXT: cset w9, hs +; CHECK-NEXT: mul x9, x5, x0 +; CHECK-NEXT: cset w12, hs ; CHECK-NEXT: cmp x3, #0 +; CHECK-NEXT: madd x9, x1, x4, x9 ; CHECK-NEXT: ccmp x7, #0, #4, ne -; CHECK-NEXT: umulh x13, x1, x4 -; CHECK-NEXT: ccmp xzr, x10, #0, eq -; CHECK-NEXT: mul x10, x5, x0 -; CHECK-NEXT: madd x10, x1, x4, x10 ; CHECK-NEXT: ccmp xzr, x11, #0, eq -; CHECK-NEXT: umulh x11, x0, x4 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: adds x10, x11, x10 -; CHECK-NEXT: cset w11, hs +; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x10, x1, x4 +; CHECK-NEXT: cset w11, ne +; CHECK-NEXT: adds x9, x13, x9 +; CHECK-NEXT: umulh x13, x5, x0 +; CHECK-NEXT: cset w14, hs ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x5, #0, #4, ne -; CHECK-NEXT: orr w9, w12, w9 +; CHECK-NEXT: orr w11, w11, w12 +; CHECK-NEXT: ccmp xzr, x10, #0, eq ; CHECK-NEXT: mul x12, x0, x4 ; CHECK-NEXT: ccmp xzr, x13, #0, eq -; CHECK-NEXT: umulh x13, x5, x0 -; CHECK-NEXT: ccmp xzr, x13, #0, eq -; CHECK-NEXT: cset w13, ne -; CHECK-NEXT: orr w11, w13, w11 -; CHECK-NEXT: fmov s0, w11 +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: orr w10, w10, w14 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: mul x10, x2, x6 +; CHECK-NEXT: mov v0.s[1], w11 ; CHECK-NEXT: ldr x11, [sp] -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: mul x9, x2, x6 -; CHECK-NEXT: stp x12, x10, [x11] +; CHECK-NEXT: stp x10, x8, [x11, #16] ; CHECK-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-NEXT: stp x9, x8, [x11, #16] +; CHECK-NEXT: stp x12, x9, [x11] ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) Index: llvm/test/CodeGen/AArch64/vecreduce-add.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -1772,23 +1772,23 @@ ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: uaddl2 v5.2d, v4.4s, v2.4s ; CHECK-NEXT: uaddl2 v6.2d, v0.4s, v3.4s -; CHECK-NEXT: ushll2 v7.8h, v1.16b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: uaddl v2.2d, v4.2s, v2.2s -; CHECK-NEXT: add v4.2d, v6.2d, v5.2d +; CHECK-NEXT: ushll2 v4.8h, v1.16b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: add v5.2d, v6.2d, v5.2d ; CHECK-NEXT: uaddl v0.2d, v0.2s, v3.2s -; CHECK-NEXT: ushll v3.4s, v7.4h, #0 -; CHECK-NEXT: ushll2 v5.4s, v7.8h, #0 +; CHECK-NEXT: ushll v3.4s, v4.4h, #0 +; CHECK-NEXT: ushll2 v4.4s, v4.8h, #0 ; CHECK-NEXT: ushll2 v6.4s, v1.8h, #0 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: uaddl2 v7.2d, v6.4s, v5.4s -; CHECK-NEXT: uaddl v5.2d, v6.2s, v5.2s +; CHECK-NEXT: uaddl2 v7.2d, v6.4s, v4.4s +; CHECK-NEXT: uaddl v4.2d, v6.2s, v4.2s ; CHECK-NEXT: uaddl2 v6.2d, v1.4s, v3.4s ; CHECK-NEXT: uaddl v1.2d, v1.2s, v3.2s ; CHECK-NEXT: add v0.2d, v0.2d, v2.2d ; CHECK-NEXT: add v2.2d, v6.2d, v7.2d -; CHECK-NEXT: add v1.2d, v1.2d, v5.2d -; CHECK-NEXT: add v0.2d, v0.2d, v4.2d +; CHECK-NEXT: add v1.2d, v1.2d, v4.2d +; CHECK-NEXT: add v0.2d, v0.2d, v5.2d ; CHECK-NEXT: add v1.2d, v1.2d, v2.2d ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: addp d0, v0.2d @@ -1814,23 +1814,23 @@ ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: saddl2 v5.2d, v4.4s, v2.4s ; CHECK-NEXT: saddl2 v6.2d, v0.4s, v3.4s -; CHECK-NEXT: sshll2 v7.8h, v1.16b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 ; CHECK-NEXT: saddl v2.2d, v4.2s, v2.2s -; CHECK-NEXT: add v4.2d, v6.2d, v5.2d +; CHECK-NEXT: sshll2 v4.8h, v1.16b, #0 +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: add v5.2d, v6.2d, v5.2d ; CHECK-NEXT: saddl v0.2d, v0.2s, v3.2s -; CHECK-NEXT: sshll v3.4s, v7.4h, #0 -; CHECK-NEXT: sshll2 v5.4s, v7.8h, #0 +; CHECK-NEXT: sshll v3.4s, v4.4h, #0 +; CHECK-NEXT: sshll2 v4.4s, v4.8h, #0 ; CHECK-NEXT: sshll2 v6.4s, v1.8h, #0 ; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: saddl2 v7.2d, v6.4s, v5.4s -; CHECK-NEXT: saddl v5.2d, v6.2s, v5.2s +; CHECK-NEXT: saddl2 v7.2d, v6.4s, v4.4s +; CHECK-NEXT: saddl v4.2d, v6.2s, v4.2s ; CHECK-NEXT: saddl2 v6.2d, v1.4s, v3.4s ; CHECK-NEXT: saddl v1.2d, v1.2s, v3.2s ; CHECK-NEXT: add v0.2d, v0.2d, v2.2d ; CHECK-NEXT: add v2.2d, v6.2d, v7.2d -; CHECK-NEXT: add v1.2d, v1.2d, v5.2d -; CHECK-NEXT: add v0.2d, v0.2d, v4.2d +; CHECK-NEXT: add v1.2d, v1.2d, v4.2d +; CHECK-NEXT: add v0.2d, v0.2d, v5.2d ; CHECK-NEXT: add v1.2d, v1.2d, v2.2d ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: addp d0, v0.2d Index: llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll @@ -166,34 +166,34 @@ define float @test_v16f32(<16 x float> %a, float %s) nounwind { ; CHECK-LABEL: test_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov s6, v0.s[1] +; CHECK-NEXT: mov s5, v0.s[1] ; CHECK-NEXT: fadd s4, s4, s0 -; CHECK-NEXT: mov s7, v0.s[2] +; CHECK-NEXT: mov s6, v0.s[2] ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: mov s5, v3.s[1] +; CHECK-NEXT: fadd s4, s4, s5 +; CHECK-NEXT: mov s5, v1.s[2] ; CHECK-NEXT: fadd s4, s4, s6 -; CHECK-NEXT: mov s6, v1.s[2] -; CHECK-NEXT: fadd s4, s4, s7 ; CHECK-NEXT: fadd s0, s4, s0 ; CHECK-NEXT: mov s4, v1.s[1] ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v1.s[3] ; CHECK-NEXT: fadd s0, s0, s4 ; CHECK-NEXT: mov s4, v2.s[2] -; CHECK-NEXT: fadd s0, s0, s6 +; CHECK-NEXT: fadd s0, s0, s5 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v2.s[1] ; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v2.s[3] -; CHECK-NEXT: mov s2, v3.s[3] +; CHECK-NEXT: mov s2, v3.s[2] ; CHECK-NEXT: fadd s0, s0, s4 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: mov s1, v3.s[2] +; CHECK-NEXT: mov s1, v3.s[1] ; CHECK-NEXT: fadd s0, s0, s3 -; CHECK-NEXT: fadd s0, s0, s5 ; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v3.s[3] ; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a) ret float %b @@ -202,27 +202,27 @@ define float @test_v16f32_neutral(<16 x float> %a) nounwind { ; CHECK-LABEL: test_v16f32_neutral: ; CHECK: // %bb.0: -; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: faddp s6, v0.2s +; CHECK-NEXT: mov s4, v0.s[2] +; CHECK-NEXT: faddp s5, v0.2s ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: mov s4, v2.s[1] -; CHECK-NEXT: fadd s5, s6, s5 -; CHECK-NEXT: mov s6, v1.s[2] -; CHECK-NEXT: fadd s0, s5, s0 -; CHECK-NEXT: mov s5, v1.s[1] +; CHECK-NEXT: fadd s4, s5, s4 +; CHECK-NEXT: mov s5, v1.s[2] +; CHECK-NEXT: fadd s0, s4, s0 +; CHECK-NEXT: mov s4, v1.s[1] ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v1.s[3] +; CHECK-NEXT: fadd s0, s0, s4 +; CHECK-NEXT: mov s4, v2.s[2] ; CHECK-NEXT: fadd s0, s0, s5 -; CHECK-NEXT: fadd s0, s0, s6 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: mov s1, v2.s[2] +; CHECK-NEXT: mov s1, v2.s[1] ; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: mov s2, v2.s[3] +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v2.s[3] +; CHECK-NEXT: mov s2, v3.s[2] ; CHECK-NEXT: fadd s0, s0, s4 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v3.s[1] -; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: mov s2, v3.s[2] ; CHECK-NEXT: fadd s0, s0, s3 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v3.s[3] Index: llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll @@ -63,14 +63,14 @@ ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 -; CHECK-NEXT: movi v5.4s, #128, lsl #24 ; CHECK-NEXT: // kill: def $s4 killed $s4 def $q4 ; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: mov v5.s[0], v4.s[0] +; CHECK-NEXT: movi v1.4s, #128, lsl #24 ; CHECK-NEXT: mov v0.s[2], v2.s[0] +; CHECK-NEXT: mov v1.s[0], v4.s[0] ; CHECK-NEXT: mov v0.s[3], v3.s[0] -; CHECK-NEXT: fadd v0.4s, v0.4s, v5.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s ; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s ; CHECK-NEXT: faddp s0, v0.2s ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/vecreduce-fadd.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -295,9 +295,9 @@ ; ; CHECKNOFP16-LABEL: fadd_reduction_v4f16_in_loop: ; CHECKNOFP16: // %bb.0: // %entry -; CHECKNOFP16-NEXT: adrp x9, .LCPI10_0 +; CHECKNOFP16-NEXT: adrp x8, .LCPI10_0 +; CHECKNOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI10_0] ; CHECKNOFP16-NEXT: mov x8, xzr -; CHECKNOFP16-NEXT: ldr h0, [x9, :lo12:.LCPI10_0] ; CHECKNOFP16-NEXT: .LBB10_1: // %loop ; CHECKNOFP16-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECKNOFP16-NEXT: ldr d1, [x0, x8] @@ -367,9 +367,9 @@ ; ; CHECKNOFP16-LABEL: fadd_reduction_v8f16_in_loop: ; CHECKNOFP16: // %bb.0: // %entry -; CHECKNOFP16-NEXT: adrp x9, .LCPI11_0 +; CHECKNOFP16-NEXT: adrp x8, .LCPI11_0 +; CHECKNOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI11_0] ; CHECKNOFP16-NEXT: mov x8, xzr -; CHECKNOFP16-NEXT: ldr h0, [x9, :lo12:.LCPI11_0] ; CHECKNOFP16-NEXT: .LBB11_1: // %loop ; CHECKNOFP16-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECKNOFP16-NEXT: ldr q1, [x0, x8] Index: llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -132,10 +132,10 @@ ; CHECK-NOFP-NEXT: fcsel s1, s2, s16, gt ; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI6_0] ; CHECK-NOFP-NEXT: mov w8, #-8388608 +; CHECK-NOFP-NEXT: fmov s16, w8 ; CHECK-NOFP-NEXT: fcvt h0, s0 ; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmov s16, w8 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fcmp s3, s2 @@ -240,10 +240,10 @@ ; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI7_0] ; CHECK-NOFP-NEXT: mov w8, #57344 ; CHECK-NOFP-NEXT: movk w8, #51071, lsl #16 +; CHECK-NOFP-NEXT: fmov s16, w8 ; CHECK-NOFP-NEXT: fcvt h0, s0 ; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmov s16, w8 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fcmp s3, s2 Index: llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -132,10 +132,10 @@ ; CHECK-NOFP-NEXT: fcsel s1, s2, s16, lt ; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI6_0] ; CHECK-NOFP-NEXT: mov w8, #2139095040 +; CHECK-NOFP-NEXT: fmov s16, w8 ; CHECK-NOFP-NEXT: fcvt h0, s0 ; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmov s16, w8 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fcmp s3, s2 @@ -240,10 +240,10 @@ ; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI7_0] ; CHECK-NOFP-NEXT: mov w8, #57344 ; CHECK-NOFP-NEXT: movk w8, #18303, lsl #16 +; CHECK-NOFP-NEXT: fmov s16, w8 ; CHECK-NOFP-NEXT: fcvt h0, s0 ; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmov s16, w8 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fcmp s3, s2 Index: llvm/test/CodeGen/AArch64/vector-fcopysign.ll =================================================================== --- llvm/test/CodeGen/AArch64/vector-fcopysign.ll +++ llvm/test/CodeGen/AArch64/vector-fcopysign.ll @@ -155,12 +155,12 @@ define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 { ; CHECK-LABEL: test_copysign_v4f64_v4f32: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.2d v3, #0xffffffffffffffff -; CHECK-NEXT: fcvtl2 v4.2d, v2.4s +; CHECK-NEXT: movi.2d v4, #0xffffffffffffffff +; CHECK-NEXT: fcvtl2 v3.2d, v2.4s ; CHECK-NEXT: fcvtl v2.2d, v2.2s -; CHECK-NEXT: fneg.2d v3, v3 -; CHECK-NEXT: bif.16b v1, v4, v3 -; CHECK-NEXT: bif.16b v0, v2, v3 +; CHECK-NEXT: fneg.2d v4, v4 +; CHECK-NEXT: bif.16b v1, v3, v4 +; CHECK-NEXT: bif.16b v0, v2, v4 ; CHECK-NEXT: ret %tmp0 = fpext <4 x float> %b to <4 x double> %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0) @@ -276,30 +276,30 @@ ; NOFP16-LABEL: test_copysign_v4f16_v4f64: ; NOFP16: ; %bb.0: ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 -; NOFP16-NEXT: mov d4, v1[1] -; NOFP16-NEXT: mov h5, v0[1] +; NOFP16-NEXT: mov d3, v1[1] +; NOFP16-NEXT: mov h4, v0[1] ; NOFP16-NEXT: fcvt s1, d1 -; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: fcvt s5, h0 ; NOFP16-NEXT: mov h7, v0[2] -; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 -; NOFP16-NEXT: fcvt s4, d4 -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: bit.16b v1, v6, v3 -; NOFP16-NEXT: fcvt s6, d2 +; NOFP16-NEXT: mvni.4s v6, #128, lsl #24 +; NOFP16-NEXT: fcvt s3, d3 +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: bit.16b v1, v5, v6 +; NOFP16-NEXT: fcvt s5, d2 ; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: mov d2, v2[1] -; NOFP16-NEXT: bit.16b v4, v5, v3 -; NOFP16-NEXT: mov h5, v0[3] +; NOFP16-NEXT: bit.16b v3, v4, v6 +; NOFP16-NEXT: mov h4, v0[3] ; NOFP16-NEXT: fcvt h0, s1 -; NOFP16-NEXT: bit.16b v6, v7, v3 +; NOFP16-NEXT: bit.16b v5, v7, v6 ; NOFP16-NEXT: fcvt s2, d2 -; NOFP16-NEXT: fcvt h1, s4 -; NOFP16-NEXT: fcvt s4, h5 -; NOFP16-NEXT: fcvt h5, s6 +; NOFP16-NEXT: fcvt h1, s3 +; NOFP16-NEXT: fcvt s3, h4 +; NOFP16-NEXT: fcvt h4, s5 ; NOFP16-NEXT: mov.h v0[1], v1[0] -; NOFP16-NEXT: mov.16b v1, v3 -; NOFP16-NEXT: bsl.16b v1, v4, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] +; NOFP16-NEXT: mov.16b v1, v6 +; NOFP16-NEXT: bsl.16b v1, v3, v2 +; NOFP16-NEXT: mov.h v0[2], v4[0] ; NOFP16-NEXT: fcvt h1, s1 ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 Index: llvm/test/CodeGen/AArch64/vector-gep.ll =================================================================== --- llvm/test/CodeGen/AArch64/vector-gep.ll +++ llvm/test/CodeGen/AArch64/vector-gep.ll @@ -13,9 +13,9 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x8, lCPI0_0@PAGE -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: Lloh1: ; CHECK-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] +; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll =================================================================== --- llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll +++ llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll @@ -4,8 +4,8 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_1_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -17,8 +17,8 @@ define <16 x i8> @ult_2_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_2_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -30,8 +30,8 @@ define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_2_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -43,8 +43,8 @@ define <16 x i8> @ult_3_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_3_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -56,8 +56,8 @@ define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_3_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -69,8 +69,8 @@ define <16 x i8> @ult_4_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -82,8 +82,8 @@ define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -95,8 +95,8 @@ define <16 x i8> @ult_5_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_5_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -108,8 +108,8 @@ define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_5_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -121,8 +121,8 @@ define <16 x i8> @ult_6_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_6_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -134,8 +134,8 @@ define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_6_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -147,8 +147,8 @@ define <16 x i8> @ult_7_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_7_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #7 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #7 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) Index: llvm/test/CodeGen/AArch64/vselect-constants.ll =================================================================== --- llvm/test/CodeGen/AArch64/vselect-constants.ll +++ llvm/test/CodeGen/AArch64/vselect-constants.ll @@ -10,12 +10,12 @@ define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_C1_or_C2_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: adrp x9, .LCPI0_1 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] +; CHECK-NEXT: adrp x8, .LCPI0_1 +; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret @@ -27,10 +27,10 @@ ; CHECK-LABEL: cmp_sel_C1_or_C2_vec: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: adrp x9, .LCPI1_1 -; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI1_1] +; CHECK-NEXT: adrp x8, .LCPI1_1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI1_1] +; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y @@ -41,12 +41,12 @@ define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cplus1_or_C_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: adrp x9, .LCPI2_1 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] +; CHECK-NEXT: adrp x8, .LCPI2_1 +; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_1] ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret @@ -58,10 +58,10 @@ ; CHECK-LABEL: cmp_sel_Cplus1_or_C_vec: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: adrp x9, .LCPI3_1 -; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] +; CHECK-NEXT: adrp x8, .LCPI3_1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_1] +; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y @@ -72,12 +72,12 @@ define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cminus1_or_C_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: adrp x9, .LCPI4_1 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_1 +; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret @@ -89,10 +89,10 @@ ; CHECK-LABEL: cmp_sel_Cminus1_or_C_vec: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: adrp x9, .LCPI5_1 -; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_1] +; CHECK-NEXT: adrp x8, .LCPI5_1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_1] +; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y Index: llvm/test/CodeGen/AArch64/vselect-ext.ll =================================================================== --- llvm/test/CodeGen/AArch64/vselect-ext.ll +++ llvm/test/CodeGen/AArch64/vselect-ext.ll @@ -31,18 +31,18 @@ ; CHECK-NEXT: ushll.8h v2, v0, #0 ; CHECK-NEXT: ushll2.8h v3, v0, #0 ; CHECK-NEXT: cmgt.16b v0, v0, v1 -; CHECK-NEXT: ushll.4s v4, v2, #0 ; CHECK-NEXT: ushll.4s v5, v3, #0 -; CHECK-NEXT: ushll2.4s v1, v2, #0 -; CHECK-NEXT: ushll2.4s v2, v3, #0 +; CHECK-NEXT: ushll2.4s v1, v3, #0 ; CHECK-NEXT: sshll.8h v3, v0, #0 ; CHECK-NEXT: sshll2.8h v0, v0, #0 +; CHECK-NEXT: ushll.4s v4, v2, #0 +; CHECK-NEXT: ushll2.4s v2, v2, #0 ; CHECK-NEXT: sshll.4s v6, v3, #0 ; CHECK-NEXT: sshll.4s v7, v0, #0 ; CHECK-NEXT: sshll2.4s v0, v0, #0 ; CHECK-NEXT: sshll2.4s v16, v3, #0 -; CHECK-NEXT: and.16b v3, v2, v0 -; CHECK-NEXT: and.16b v1, v1, v16 +; CHECK-NEXT: and.16b v3, v1, v0 +; CHECK-NEXT: and.16b v1, v2, v16 ; CHECK-NEXT: and.16b v2, v5, v7 ; CHECK-NEXT: and.16b v0, v4, v6 ; CHECK-NEXT: ret @@ -116,14 +116,13 @@ define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_other_use(<16 x i8> %a, <16 x i64> %v, <16 x i64>* %ptr) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_other_use: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov.16b v16, v2 -; CHECK-NEXT: movi.16b v2, #10 -; CHECK-NEXT: ushll.8h v18, v0, #0 +; CHECK-NEXT: movi.16b v18, #10 +; CHECK-NEXT: ldr q16, [sp] +; CHECK-NEXT: ushll.8h v17, v0, #0 ; CHECK-NEXT: ushll2.8h v20, v0, #0 -; CHECK-NEXT: mov.16b v17, v1 -; CHECK-NEXT: ldr q1, [sp] -; CHECK-NEXT: cmhi.16b v0, v0, v2 -; CHECK-NEXT: ushll.4s v19, v18, #0 +; CHECK-NEXT: ushll.4s v19, v17, #0 +; CHECK-NEXT: cmhi.16b v0, v0, v18 +; CHECK-NEXT: ushll.4s v18, v20, #0 ; CHECK-NEXT: sshll2.8h v21, v0, #0 ; CHECK-NEXT: sshll.8h v0, v0, #0 ; CHECK-NEXT: sshll2.4s v22, v21, #0 @@ -133,30 +132,31 @@ ; CHECK-NEXT: sshll2.4s v25, v0, #0 ; CHECK-NEXT: sshll2.2d v26, v21, #0 ; CHECK-NEXT: sshll.2d v28, v21, #0 -; CHECK-NEXT: sshll2.2d v27, v25, #0 ; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: and.16b v1, v1, v23 -; CHECK-NEXT: and.16b v7, v7, v24 +; CHECK-NEXT: sshll2.2d v27, v25, #0 ; CHECK-NEXT: sshll.2d v29, v25, #0 -; CHECK-NEXT: stp q7, q1, [x0, #96] -; CHECK-NEXT: and.16b v1, v6, v26 +; CHECK-NEXT: and.16b v16, v16, v23 +; CHECK-NEXT: and.16b v7, v7, v24 +; CHECK-NEXT: and.16b v6, v6, v26 +; CHECK-NEXT: stp q7, q16, [x0, #96] ; CHECK-NEXT: and.16b v5, v5, v28 -; CHECK-NEXT: ushll.4s v2, v20, #0 -; CHECK-NEXT: stp q5, q1, [x0, #64] -; CHECK-NEXT: ushll2.4s v18, v18, #0 +; CHECK-NEXT: ushll2.4s v17, v17, #0 +; CHECK-NEXT: stp q5, q6, [x0, #64] +; CHECK-NEXT: sshll2.2d v6, v0, #0 +; CHECK-NEXT: sshll.2d v7, v0, #0 ; CHECK-NEXT: ushll2.4s v20, v20, #0 -; CHECK-NEXT: and.16b v1, v4, v27 -; CHECK-NEXT: sshll2.2d v4, v0, #0 -; CHECK-NEXT: sshll.2d v5, v0, #0 +; CHECK-NEXT: and.16b v4, v4, v27 ; CHECK-NEXT: and.16b v3, v3, v29 -; CHECK-NEXT: stp q3, q1, [x0, #32] +; CHECK-NEXT: stp q3, q4, [x0, #32] +; CHECK-NEXT: and.16b v4, v17, v25 +; CHECK-NEXT: and.16b v5, v18, v21 +; CHECK-NEXT: and.16b v2, v2, v6 +; CHECK-NEXT: and.16b v6, v1, v7 ; CHECK-NEXT: and.16b v3, v20, v22 -; CHECK-NEXT: and.16b v1, v18, v25 -; CHECK-NEXT: and.16b v2, v2, v21 +; CHECK-NEXT: stp q6, q2, [x0] ; CHECK-NEXT: and.16b v0, v19, v0 -; CHECK-NEXT: and.16b v4, v16, v4 -; CHECK-NEXT: and.16b v5, v17, v5 -; CHECK-NEXT: stp q5, q4, [x0] +; CHECK-NEXT: mov.16b v1, v4 +; CHECK-NEXT: mov.16b v2, v5 ; CHECK-NEXT: ret entry: %ext = zext <16 x i8> %a to <16 x i32> @@ -173,19 +173,19 @@ ; CHECK-NEXT: movi.16b v1, #10 ; CHECK-NEXT: sshll.8h v3, v0, #0 ; CHECK-NEXT: sshll2.8h v2, v0, #0 +; CHECK-NEXT: ext.16b v4, v3, v3, #8 ; CHECK-NEXT: cmgt.16b v0, v0, v1 -; CHECK-NEXT: ext.16b v1, v3, v3, #8 +; CHECK-NEXT: ext.16b v1, v2, v2, #8 ; CHECK-NEXT: sshll.8h v5, v0, #0 ; CHECK-NEXT: sshll2.8h v0, v0, #0 -; CHECK-NEXT: ext.16b v4, v2, v2, #8 ; CHECK-NEXT: ext.16b v6, v5, v5, #8 ; CHECK-NEXT: ext.16b v7, v0, v0, #8 ; CHECK-NEXT: and.8b v0, v2, v0 ; CHECK-NEXT: sshll.4s v2, v0, #0 ; CHECK-NEXT: and.8b v0, v3, v5 -; CHECK-NEXT: and.8b v1, v1, v6 -; CHECK-NEXT: and.8b v3, v4, v7 ; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: and.8b v3, v1, v7 +; CHECK-NEXT: and.8b v1, v4, v6 ; CHECK-NEXT: sshll.4s v1, v1, #0 ; CHECK-NEXT: sshll.4s v3, v3, #0 ; CHECK-NEXT: ret @@ -202,19 +202,19 @@ ; CHECK-NEXT: movi.16b v1, #10 ; CHECK-NEXT: sshll.8h v3, v0, #0 ; CHECK-NEXT: sshll2.8h v2, v0, #0 +; CHECK-NEXT: ext.16b v4, v3, v3, #8 ; CHECK-NEXT: cmhi.16b v0, v0, v1 -; CHECK-NEXT: ext.16b v1, v3, v3, #8 +; CHECK-NEXT: ext.16b v1, v2, v2, #8 ; CHECK-NEXT: sshll.8h v5, v0, #0 ; CHECK-NEXT: sshll2.8h v0, v0, #0 -; CHECK-NEXT: ext.16b v4, v2, v2, #8 ; CHECK-NEXT: ext.16b v6, v5, v5, #8 ; CHECK-NEXT: ext.16b v7, v0, v0, #8 ; CHECK-NEXT: and.8b v0, v2, v0 ; CHECK-NEXT: sshll.4s v2, v0, #0 ; CHECK-NEXT: and.8b v0, v3, v5 -; CHECK-NEXT: and.8b v1, v1, v6 -; CHECK-NEXT: and.8b v3, v4, v7 ; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: and.8b v3, v1, v7 +; CHECK-NEXT: and.8b v1, v4, v6 ; CHECK-NEXT: sshll.4s v1, v1, #0 ; CHECK-NEXT: sshll.4s v3, v3, #0 ; CHECK-NEXT: ret @@ -275,12 +275,12 @@ ; CHECK-NEXT: sshll.8h v2, v2, #0 ; CHECK-NEXT: and.16b v5, v5, v6 ; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: sshll2.4s v7, v2, #0 ; CHECK-NEXT: stp q3, q5, [x1, #32] -; CHECK-NEXT: sshll2.4s v4, v2, #0 ; CHECK-NEXT: sshll.4s v2, v2, #0 ; CHECK-NEXT: ushll2.4s v3, v1, #0 ; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: and.16b v3, v3, v7 ; CHECK-NEXT: and.16b v1, v1, v2 ; CHECK-NEXT: stp q1, q3, [x1], #64 ; CHECK-NEXT: b.ne LBB8_1 @@ -312,31 +312,31 @@ ; CHECK-LABEL: extension_in_loop_as_shuffle_v16i8_to_v16i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x9, lCPI9_0@PAGE +; CHECK-NEXT: adrp x8, lCPI9_0@PAGE ; CHECK-NEXT: Lloh1: -; CHECK-NEXT: adrp x10, lCPI9_1@PAGE +; CHECK-NEXT: ldr q1, [x8, lCPI9_0@PAGEOFF] ; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x11, lCPI9_2@PAGE +; CHECK-NEXT: adrp x8, lCPI9_1@PAGE ; CHECK-NEXT: Lloh3: -; CHECK-NEXT: adrp x12, lCPI9_3@PAGE -; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: ldr q2, [x8, lCPI9_1@PAGEOFF] ; CHECK-NEXT: Lloh4: -; CHECK-NEXT: ldr q0, [x9, lCPI9_0@PAGEOFF] +; CHECK-NEXT: adrp x8, lCPI9_2@PAGE +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff ; CHECK-NEXT: Lloh5: -; CHECK-NEXT: ldr q1, [x10, lCPI9_1@PAGEOFF] +; CHECK-NEXT: ldr q3, [x8, lCPI9_2@PAGEOFF] ; CHECK-NEXT: Lloh6: -; CHECK-NEXT: ldr q3, [x11, lCPI9_2@PAGEOFF] +; CHECK-NEXT: adrp x8, lCPI9_3@PAGE ; CHECK-NEXT: Lloh7: -; CHECK-NEXT: ldr q4, [x12, lCPI9_3@PAGEOFF] +; CHECK-NEXT: ldr q4, [x8, lCPI9_3@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB9_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q5, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: cmgt.16b v6, v5, v2 -; CHECK-NEXT: tbl.16b v7, { v5 }, v0 -; CHECK-NEXT: tbl.16b v16, { v5 }, v1 +; CHECK-NEXT: cmgt.16b v6, v5, v0 +; CHECK-NEXT: tbl.16b v7, { v5 }, v1 +; CHECK-NEXT: tbl.16b v16, { v5 }, v2 ; CHECK-NEXT: sshll2.8h v18, v6, #0 ; CHECK-NEXT: tbl.16b v17, { v5 }, v3 ; CHECK-NEXT: sshll2.4s v19, v18, #0 @@ -354,10 +354,13 @@ ; CHECK-NEXT: b.ne LBB9_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh7 -; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh6 -; CHECK-NEXT: .loh AdrpLdr Lloh1, Lloh5 -; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh4 +; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7 +; CHECK-NEXT: .loh AdrpAdrp Lloh4, Lloh6 +; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5 +; CHECK-NEXT: .loh AdrpAdrp Lloh2, Lloh4 +; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 +; CHECK-NEXT: .loh AdrpAdrp Lloh0, Lloh2 +; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 entry: br label %loop @@ -385,31 +388,31 @@ ; CHECK-LABEL: shuffle_in_loop_is_no_extend_v16i8_to_v16i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh8: -; CHECK-NEXT: adrp x9, lCPI10_0@PAGE +; CHECK-NEXT: adrp x8, lCPI10_0@PAGE ; CHECK-NEXT: Lloh9: -; CHECK-NEXT: adrp x10, lCPI10_1@PAGE +; CHECK-NEXT: ldr q1, [x8, lCPI10_0@PAGEOFF] ; CHECK-NEXT: Lloh10: -; CHECK-NEXT: adrp x11, lCPI10_2@PAGE +; CHECK-NEXT: adrp x8, lCPI10_1@PAGE ; CHECK-NEXT: Lloh11: -; CHECK-NEXT: adrp x12, lCPI10_3@PAGE -; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: ldr q2, [x8, lCPI10_1@PAGEOFF] ; CHECK-NEXT: Lloh12: -; CHECK-NEXT: ldr q0, [x9, lCPI10_0@PAGEOFF] +; CHECK-NEXT: adrp x8, lCPI10_2@PAGE +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff ; CHECK-NEXT: Lloh13: -; CHECK-NEXT: ldr q1, [x10, lCPI10_1@PAGEOFF] +; CHECK-NEXT: ldr q3, [x8, lCPI10_2@PAGEOFF] ; CHECK-NEXT: Lloh14: -; CHECK-NEXT: ldr q3, [x11, lCPI10_2@PAGEOFF] +; CHECK-NEXT: adrp x8, lCPI10_3@PAGE ; CHECK-NEXT: Lloh15: -; CHECK-NEXT: ldr q4, [x12, lCPI10_3@PAGEOFF] +; CHECK-NEXT: ldr q4, [x8, lCPI10_3@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB10_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q5, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: cmgt.16b v6, v5, v2 -; CHECK-NEXT: tbl.16b v7, { v5 }, v0 -; CHECK-NEXT: tbl.16b v16, { v5 }, v1 +; CHECK-NEXT: cmgt.16b v6, v5, v0 +; CHECK-NEXT: tbl.16b v7, { v5 }, v1 +; CHECK-NEXT: tbl.16b v16, { v5 }, v2 ; CHECK-NEXT: sshll2.8h v18, v6, #0 ; CHECK-NEXT: tbl.16b v17, { v5 }, v3 ; CHECK-NEXT: sshll2.4s v19, v18, #0 @@ -427,10 +430,13 @@ ; CHECK-NEXT: b.ne LBB10_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh15 -; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh14 -; CHECK-NEXT: .loh AdrpLdr Lloh9, Lloh13 -; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh12 +; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh15 +; CHECK-NEXT: .loh AdrpAdrp Lloh12, Lloh14 +; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh13 +; CHECK-NEXT: .loh AdrpAdrp Lloh10, Lloh12 +; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh11 +; CHECK-NEXT: .loh AdrpAdrp Lloh8, Lloh10 +; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh9 entry: br label %loop Index: llvm/test/CodeGen/AArch64/win64_vararg.ll =================================================================== --- llvm/test/CodeGen/AArch64/win64_vararg.ll +++ llvm/test/CodeGen/AArch64/win64_vararg.ll @@ -7,8 +7,8 @@ ; CHECK-NEXT: str x30, [sp, #-80]! // 8-byte Folded Spill ; CHECK-NEXT: add x8, sp, #24 ; CHECK-NEXT: add x0, sp, #24 -; CHECK-NEXT: stp x3, x4, [sp, #40] ; CHECK-NEXT: stp x1, x2, [sp, #24] +; CHECK-NEXT: stp x3, x4, [sp, #40] ; CHECK-NEXT: stp x5, x6, [sp, #56] ; CHECK-NEXT: str x7, [sp, #72] ; CHECK-NEXT: str x8, [sp, #8] @@ -86,8 +86,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: stp x3, x4, [sp, #40] ; CHECK-NEXT: stp x1, x2, [sp, #24] +; CHECK-NEXT: stp x3, x4, [sp, #40] ; CHECK-NEXT: stp x5, x6, [sp, #56] ; CHECK-NEXT: str x7, [sp, #72] ; CHECK-NEXT: stp x8, x8, [sp], #80 @@ -187,15 +187,15 @@ ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: add x8, x29, #24 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: mov x19, x1 +; CHECK-NEXT: str x8, [x29, #16] +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: add x8, x8, #15 ; CHECK-NEXT: mov x23, sp -; CHECK-NEXT: stp x3, x4, [x29, #32] -; CHECK-NEXT: stp x8, x2, [x29, #16] -; CHECK-NEXT: add x8, x9, #15 ; CHECK-NEXT: lsr x15, x8, #4 -; CHECK-NEXT: stp x5, x6, [x29, #48] -; CHECK-NEXT: str x7, [x29, #64] +; CHECK-NEXT: stp x2, x3, [x29, #24] +; CHECK-NEXT: stp x4, x5, [x29, #40] +; CHECK-NEXT: stp x6, x7, [x29, #56] ; CHECK-NEXT: bl __chkstk ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: sub x20, x8, x15, lsl #4 Index: llvm/test/CodeGen/AArch64/win64_vararg_float.ll =================================================================== --- llvm/test/CodeGen/AArch64/win64_vararg_float.ll +++ llvm/test/CodeGen/AArch64/win64_vararg_float.ll @@ -7,13 +7,13 @@ ; DAGISEL-LABEL: float_va_fn: ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill -; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: fmov s0, w0 +; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: add x0, sp, #16 -; DAGISEL-NEXT: stp x3, x4, [sp, #24] -; DAGISEL-NEXT: stp x5, x6, [sp, #40] -; DAGISEL-NEXT: stp x8, x2, [sp, #8] -; DAGISEL-NEXT: str x7, [sp, #56] +; DAGISEL-NEXT: stp x2, x3, [sp, #16] +; DAGISEL-NEXT: stp x4, x5, [sp, #32] +; DAGISEL-NEXT: stp x6, x7, [sp, #48] +; DAGISEL-NEXT: str x8, [sp, #8] ; DAGISEL-NEXT: bl f_va_list ; DAGISEL-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload ; DAGISEL-NEXT: ret @@ -58,13 +58,13 @@ ; DAGISEL-LABEL: double_va_fn: ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill -; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: fmov d0, x0 +; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: add x0, sp, #16 -; DAGISEL-NEXT: stp x3, x4, [sp, #24] -; DAGISEL-NEXT: stp x5, x6, [sp, #40] -; DAGISEL-NEXT: stp x8, x2, [sp, #8] -; DAGISEL-NEXT: str x7, [sp, #56] +; DAGISEL-NEXT: stp x2, x3, [sp, #16] +; DAGISEL-NEXT: stp x4, x5, [sp, #32] +; DAGISEL-NEXT: stp x6, x7, [sp, #48] +; DAGISEL-NEXT: str x8, [sp, #8] ; DAGISEL-NEXT: bl d_va_list ; DAGISEL-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload ; DAGISEL-NEXT: ret Index: llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll =================================================================== --- llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll +++ llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll @@ -7,13 +7,13 @@ ; DAGISEL-LABEL: float_va_fn: ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill -; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: fmov s0, w0 +; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: add x0, sp, #16 -; DAGISEL-NEXT: stp x3, x4, [sp, #24] -; DAGISEL-NEXT: stp x5, x6, [sp, #40] -; DAGISEL-NEXT: stp x8, x2, [sp, #8] -; DAGISEL-NEXT: str x7, [sp, #56] +; DAGISEL-NEXT: stp x2, x3, [sp, #16] +; DAGISEL-NEXT: stp x4, x5, [sp, #32] +; DAGISEL-NEXT: stp x6, x7, [sp, #48] +; DAGISEL-NEXT: str x8, [sp, #8] ; DAGISEL-NEXT: bl f_va_list ; DAGISEL-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload ; DAGISEL-NEXT: ret @@ -58,13 +58,13 @@ ; DAGISEL-LABEL: double_va_fn: ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill -; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: fmov d0, x0 +; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: add x0, sp, #16 -; DAGISEL-NEXT: stp x3, x4, [sp, #24] -; DAGISEL-NEXT: stp x5, x6, [sp, #40] -; DAGISEL-NEXT: stp x8, x2, [sp, #8] -; DAGISEL-NEXT: str x7, [sp, #56] +; DAGISEL-NEXT: stp x2, x3, [sp, #16] +; DAGISEL-NEXT: stp x4, x5, [sp, #32] +; DAGISEL-NEXT: stp x6, x7, [sp, #48] +; DAGISEL-NEXT: str x8, [sp, #8] ; DAGISEL-NEXT: bl d_va_list ; DAGISEL-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload ; DAGISEL-NEXT: ret Index: llvm/test/CodeGen/AArch64/zero-call-used-regs.ll =================================================================== --- llvm/test/CodeGen/AArch64/zero-call-used-regs.ll +++ llvm/test/CodeGen/AArch64/zero-call-used-regs.ll @@ -86,10 +86,10 @@ ; CHECK-NEXT: mov x1, #0 ; CHECK-NEXT: mov x3, #0 ; CHECK-NEXT: mov x4, #0 -; CHECK-NEXT: orr w0, w8, w2 -; CHECK-NEXT: mov x2, #0 ; CHECK-NEXT: mov x5, #0 ; CHECK-NEXT: mov x6, #0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x2, #0 ; CHECK-NEXT: mov x7, #0 ; CHECK-NEXT: mov x8, #0 ; CHECK-NEXT: mov x18, #0 @@ -108,10 +108,10 @@ ; CHECK-NEXT: mov x1, #0 ; CHECK-NEXT: mov x3, #0 ; CHECK-NEXT: mov x4, #0 -; CHECK-NEXT: orr w0, w8, w2 -; CHECK-NEXT: mov x2, #0 ; CHECK-NEXT: mov x5, #0 ; CHECK-NEXT: mov x6, #0 +; CHECK-NEXT: orr w0, w8, w2 +; CHECK-NEXT: mov x2, #0 ; CHECK-NEXT: mov x7, #0 ; CHECK-NEXT: mov x8, #0 ; CHECK-NEXT: mov x9, #0 @@ -139,10 +139,10 @@ ; DEFAULT-NEXT: mov x1, #0 ; DEFAULT-NEXT: mov x3, #0 ; DEFAULT-NEXT: mov x4, #0 -; DEFAULT-NEXT: orr w0, w8, w2 -; DEFAULT-NEXT: mov x2, #0 ; DEFAULT-NEXT: mov x5, #0 ; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x2, #0 ; DEFAULT-NEXT: mov x7, #0 ; DEFAULT-NEXT: mov x8, #0 ; DEFAULT-NEXT: mov x18, #0 @@ -162,10 +162,10 @@ ; SVE-NEXT: mov x1, #0 ; SVE-NEXT: mov x3, #0 ; SVE-NEXT: mov x4, #0 -; SVE-NEXT: orr w0, w8, w2 -; SVE-NEXT: mov x2, #0 ; SVE-NEXT: mov x5, #0 ; SVE-NEXT: mov x6, #0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x2, #0 ; SVE-NEXT: mov x7, #0 ; SVE-NEXT: mov x8, #0 ; SVE-NEXT: mov x18, #0 @@ -196,10 +196,10 @@ ; DEFAULT-NEXT: mov x1, #0 ; DEFAULT-NEXT: mov x3, #0 ; DEFAULT-NEXT: mov x4, #0 -; DEFAULT-NEXT: orr w0, w8, w2 -; DEFAULT-NEXT: mov x2, #0 ; DEFAULT-NEXT: mov x5, #0 ; DEFAULT-NEXT: mov x6, #0 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x2, #0 ; DEFAULT-NEXT: mov x7, #0 ; DEFAULT-NEXT: mov x8, #0 ; DEFAULT-NEXT: mov x9, #0 @@ -252,10 +252,10 @@ ; SVE-NEXT: mov x1, #0 ; SVE-NEXT: mov x3, #0 ; SVE-NEXT: mov x4, #0 -; SVE-NEXT: orr w0, w8, w2 -; SVE-NEXT: mov x2, #0 ; SVE-NEXT: mov x5, #0 ; SVE-NEXT: mov x6, #0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x2, #0 ; SVE-NEXT: mov x7, #0 ; SVE-NEXT: mov x8, #0 ; SVE-NEXT: mov x9, #0 Index: llvm/test/MC/AArch64/elf-globaladdress.ll =================================================================== --- llvm/test/MC/AArch64/elf-globaladdress.ll +++ llvm/test/MC/AArch64/elf-globaladdress.ll @@ -42,12 +42,12 @@ ; OBJ: Relocations [ ; OBJ: Section {{.*}} .rela.text { ; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var8 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var16 ; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST8_ABS_LO12_NC var8 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var32 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var16 ; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST16_ABS_LO12_NC var16 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var64 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var32 ; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST32_ABS_LO12_NC var32 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var64 ; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST64_ABS_LO12_NC var64 ; This is on the store, so not really important, but it stops the next Index: llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll =================================================================== --- llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll +++ llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll @@ -10,9 +10,9 @@ ; CHECK-NEXT: add w8, w2, #1 ; CHECK-NEXT: .LBB0_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subs w1, w1, #1 ; CHECK-NEXT: strb wzr, [x0, w8, sxtw] ; CHECK-NEXT: add w8, w8, #1 -; CHECK-NEXT: subs w1, w1, #1 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit ; CHECK-NEXT: ret @@ -37,8 +37,8 @@ ; CHECK-NEXT: add w8, w2, #1 ; CHECK-NEXT: .LBB1_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: strb wzr, [x0, w8, sxtw] ; CHECK-NEXT: subs w1, w1, #1 +; CHECK-NEXT: strb wzr, [x0, w8, sxtw] ; CHECK-NEXT: add w8, w8, #1 ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: // %bb.2: // %exit Index: llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll =================================================================== --- llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -14,9 +14,9 @@ ; CHECK-NEXT: b.ge .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w8, #1 -; CHECK-NEXT: stp w10, w8, [x9] -; CHECK-NEXT: mov w8, w10 +; CHECK-NEXT: str w8, [x9, #4] +; CHECK-NEXT: add w8, w8, #1 +; CHECK-NEXT: str w8, [x9] ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.lt .LBB0_1 ; CHECK-NEXT: .LBB0_2: // %while_end @@ -54,9 +54,9 @@ ; CHECK-NEXT: b.ge .LBB1_3 ; CHECK-NEXT: .LBB1_2: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w8, #1 -; CHECK-NEXT: stp w10, w8, [x9] -; CHECK-NEXT: mov w8, w10 +; CHECK-NEXT: str w8, [x9, #4] +; CHECK-NEXT: add w8, w8, #1 +; CHECK-NEXT: str w8, [x9] ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.lt .LBB1_2 ; CHECK-NEXT: .LBB1_3: // %while_end @@ -96,9 +96,9 @@ ; CHECK-NEXT: b.ge .LBB2_3 ; CHECK-NEXT: .LBB2_2: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w8, #1 -; CHECK-NEXT: stp w10, w8, [x9] -; CHECK-NEXT: mov w8, w10 +; CHECK-NEXT: str w8, [x9, #4] +; CHECK-NEXT: add w8, w8, #1 +; CHECK-NEXT: str w8, [x9] ; CHECK-NEXT: cmp w8, w3 ; CHECK-NEXT: b.lt .LBB2_2 ; CHECK-NEXT: .LBB2_3: // %while_end @@ -165,9 +165,9 @@ ; CHECK-NEXT: b.ge .LBB3_4 ; CHECK-NEXT: // %bb.3: // %while_body ; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1 -; CHECK-NEXT: add w9, w20, #1 -; CHECK-NEXT: stp w9, w20, [x8] -; CHECK-NEXT: mov w20, w9 +; CHECK-NEXT: str w20, [x8, #4] +; CHECK-NEXT: add w20, w20, #1 +; CHECK-NEXT: str w20, [x8] ; CHECK-NEXT: b .LBB3_1 ; CHECK-NEXT: .LBB3_4: // %while_end ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload @@ -228,9 +228,9 @@ ; CHECK-NEXT: b.ge .LBB4_2 ; CHECK-NEXT: .LBB4_1: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w8, #1 -; CHECK-NEXT: stp w10, w8, [x9] -; CHECK-NEXT: mov w8, w10 +; CHECK-NEXT: str w8, [x9, #4] +; CHECK-NEXT: add w8, w8, #1 +; CHECK-NEXT: str w8, [x9] ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.lt .LBB4_1 ; CHECK-NEXT: .LBB4_2: // %while_end Index: llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll +++ llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll @@ -15,14 +15,14 @@ ; CHECK-NEXT: ldr q0, [x11, x8] ; CHECK-NEXT: add x13, x0, x8 ; CHECK-NEXT: ldr q1, [x12, x8] -; CHECK-NEXT: add x14, x1, x8 -; CHECK-NEXT: ldr q4, [x10, x8] ; CHECK-NEXT: subs w5, w5, #1 ; CHECK-NEXT: ldp q2, q3, [x13] +; CHECK-NEXT: add x13, x1, x8 ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldp q6, q1, [x14] +; CHECK-NEXT: ldp q6, q1, [x13] ; CHECK-NEXT: fadd v2.4s, v2.4s, v3.4s ; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q4, [x10, x8] ; CHECK-NEXT: ldr q5, [x9, x8] ; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: fadd v1.4s, v6.4s, v1.4s Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll @@ -7,6 +7,7 @@ %3 = alloca i32, align 4 %4 = alloca i32, align 4 %5 = alloca i32, align 4 + %l6 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 0, i32* %2, align 4 %6 = load i32, i32* %2, align 4 @@ -17,6 +18,7 @@ store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 br label %10 store i32 1, i32* %4, align 4 @@ -30,6 +32,7 @@ store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 br label %15 store i32 1, i32* %4, align 4 @@ -44,6 +47,7 @@ %3 = alloca i32, align 4 %4 = alloca i32, align 4 %5 = alloca i32, align 4 + %l6 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 0, i32* @x, align 4 @@ -51,12 +55,14 @@ store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 store i32 1, i32* @x, align 4 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() store i32 1, i32* %2, align 4 store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 ret i32 0 } Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected @@ -8,6 +8,7 @@ %3 = alloca i32, align 4 %4 = alloca i32, align 4 %5 = alloca i32, align 4 + %l6 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 0, i32* %2, align 4 %6 = load i32, i32* %2, align 4 @@ -18,6 +19,7 @@ store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 br label %10 store i32 1, i32* %4, align 4 @@ -31,6 +33,7 @@ store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 br label %15 store i32 1, i32* %4, align 4 @@ -45,6 +48,7 @@ %3 = alloca i32, align 4 %4 = alloca i32, align 4 %5 = alloca i32, align 4 + %l6 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 0, i32* @x, align 4 @@ -52,12 +56,14 @@ store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 store i32 1, i32* @x, align 4 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() store i32 1, i32* %2, align 4 store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 ret i32 0 } @@ -83,11 +89,17 @@ ; CHECK-NEXT: str w8, [sp, #16] ; CHECK-NEXT: b .LBB0_5 ; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w10, #3 +; CHECK-NEXT: stp w9, w8, [x29, #-12] ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: ldur w8, [x29, #-8] ; CHECK-NEXT: cbnz w8, .LBB0_2 ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: stp w9, w8, [x29, #-12] +; CHECK-NEXT: mov w10, #3 ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: .LBB0_5: ; CHECK-NEXT: mov w0, wzr @@ -109,19 +121,22 @@ ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: adrp x9, x -; CHECK-NEXT: mov w10, #2 -; CHECK-NEXT: mov w11, #3 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: str w8, [x9, :lo12:x] -; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: adrp x12, x +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w10, #3 +; CHECK-NEXT: mov w11, #4 +; CHECK-NEXT: str w8, [x12, :lo12:x] +; CHECK-NEXT: mov w12, #5 ; CHECK-NEXT: stp w8, wzr, [x29, #-8] -; CHECK-NEXT: stur w10, [x29, #-12] -; CHECK-NEXT: stp w9, w11, [sp, #12] +; CHECK-NEXT: stur w9, [x29, #-12] +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: stp w11, w10, [sp, #12] +; CHECK-NEXT: str w12, [sp, #8] ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: stp w10, w8, [x29, #-12] -; CHECK-NEXT: stp w9, w11, [sp, #12] +; CHECK-NEXT: stp w9, w8, [x29, #-12] +; CHECK-NEXT: stp w11, w10, [sp, #12] +; CHECK-NEXT: str w12, [sp, #8] ; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 @@ -132,9 +147,8 @@ ; ; CHECK-LABEL: OUTLINED_FUNCTION_0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: mov w10, #3 -; CHECK-NEXT: mov w11, #4 -; CHECK-NEXT: stp w9, w8, [x29, #-12] -; CHECK-NEXT: stp w11, w10, [sp, #12] +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: stp w8, w10, [sp, #12] +; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: str w8, [sp, #8] ; CHECK-NEXT: ret Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected @@ -24,11 +24,17 @@ ; CHECK-NEXT: str w8, [sp, #16] ; CHECK-NEXT: b .LBB0_5 ; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w10, #3 +; CHECK-NEXT: stp w9, w8, [x29, #-12] ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: ldur w8, [x29, #-8] ; CHECK-NEXT: cbnz w8, .LBB0_2 ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: stp w9, w8, [x29, #-12] +; CHECK-NEXT: mov w10, #3 ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: .LBB0_5: ; CHECK-NEXT: mov w0, wzr @@ -44,6 +50,7 @@ %3 = alloca i32, align 4 %4 = alloca i32, align 4 %5 = alloca i32, align 4 + %l6 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 0, i32* %2, align 4 %6 = load i32, i32* %2, align 4 @@ -54,6 +61,7 @@ store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 br label %10 store i32 1, i32* %4, align 4 @@ -67,6 +75,7 @@ store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 br label %15 store i32 1, i32* %4, align 4 @@ -86,19 +95,22 @@ ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: adrp x9, x -; CHECK-NEXT: mov w10, #2 -; CHECK-NEXT: mov w11, #3 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: str w8, [x9, :lo12:x] -; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: adrp x12, x +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w10, #3 +; CHECK-NEXT: mov w11, #4 +; CHECK-NEXT: str w8, [x12, :lo12:x] +; CHECK-NEXT: mov w12, #5 ; CHECK-NEXT: stp w8, wzr, [x29, #-8] -; CHECK-NEXT: stur w10, [x29, #-12] -; CHECK-NEXT: stp w9, w11, [sp, #12] +; CHECK-NEXT: stur w9, [x29, #-12] +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: stp w11, w10, [sp, #12] +; CHECK-NEXT: str w12, [sp, #8] ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: stp w10, w8, [x29, #-12] -; CHECK-NEXT: stp w9, w11, [sp, #12] +; CHECK-NEXT: stp w9, w8, [x29, #-12] +; CHECK-NEXT: stp w11, w10, [sp, #12] +; CHECK-NEXT: str w12, [sp, #8] ; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 @@ -111,6 +123,7 @@ %3 = alloca i32, align 4 %4 = alloca i32, align 4 %5 = alloca i32, align 4 + %l6 = alloca i32, align 4 store i32 0, i32* %1, align 4 store i32 0, i32* @x, align 4 @@ -118,12 +131,14 @@ store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 store i32 1, i32* @x, align 4 call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() store i32 1, i32* %2, align 4 store i32 2, i32* %3, align 4 store i32 3, i32* %4, align 4 store i32 4, i32* %5, align 4 + store i32 5, i32* %l6, align 4 ret i32 0 } Index: llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s @@ -8,12 +8,12 @@ # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 8 -# CHECK-NEXT: Total Cycles: 9 +# CHECK-NEXT: Total Cycles: 7 # CHECK-NEXT: Total uOps: 8 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.89 -# CHECK-NEXT: IPC: 0.89 +# CHECK-NEXT: uOps Per Cycle: 1.14 +# CHECK-NEXT: IPC: 1.14 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -25,10 +25,10 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 0.50 add w2, w3, #1 -# CHECK-NEXT: 1 3 0.50 add w4, w3, #2, lsl #12 -# CHECK-NEXT: 1 3 0.50 add w0, w4, #3 -# CHECK-NEXT: 1 3 0.50 add w1, w0, #4 +# CHECK-NEXT: 1 1 0.50 add w2, w3, #1 +# CHECK-NEXT: 1 1 0.50 add w4, w3, #2, lsl #12 +# CHECK-NEXT: 1 1 0.50 add w0, w4, #3 +# CHECK-NEXT: 1 1 0.50 add w1, w0, #4 # CHECK: Resources: # CHECK-NEXT: [0.0] - CortexA55UnitALU @@ -56,16 +56,16 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - add w1, w0, #4 # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: Index 0123456 -# CHECK: [0,0] DeeE . . add w2, w3, #1 -# CHECK-NEXT: [0,1] DeeE . . add w4, w3, #2, lsl #12 -# CHECK-NEXT: [0,2] .DeeE. . add w0, w4, #3 -# CHECK-NEXT: [0,3] . DeeE . add w1, w0, #4 -# CHECK-NEXT: [1,0] . DeeE . add w2, w3, #1 -# CHECK-NEXT: [1,1] . DeeE . add w4, w3, #2, lsl #12 -# CHECK-NEXT: [1,2] . DeeE. add w0, w4, #3 -# CHECK-NEXT: [1,3] . DeeE add w1, w0, #4 +# CHECK: [0,0] DE .. add w2, w3, #1 +# CHECK-NEXT: [0,1] DE .. add w4, w3, #2, lsl #12 +# CHECK-NEXT: [0,2] .DE .. add w0, w4, #3 +# CHECK-NEXT: [0,3] . DE .. add w1, w0, #4 +# CHECK-NEXT: [1,0] . DE .. add w2, w3, #1 +# CHECK-NEXT: [1,1] . DE.. add w4, w3, #2, lsl #12 +# CHECK-NEXT: [1,2] . DE. add w0, w4, #3 +# CHECK-NEXT: [1,3] . DE add w1, w0, #4 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions Index: llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s @@ -29,13 +29,13 @@ # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 2 3 1.00 * ldr w4, [x2], #4 # CHECK-NEXT: 1 3 1.00 * ldr w5, [x3] -# CHECK-NEXT: 1 4 1.00 madd w0, w5, w4, w0 -# CHECK-NEXT: 1 3 0.50 add x3, x3, x13 -# CHECK-NEXT: 1 3 0.50 subs x1, x1, #1 -# CHECK-NEXT: 1 1 1.00 * str w0, [x21, x18, lsl #2] +# CHECK-NEXT: 1 3 1.00 madd w0, w5, w4, w0 +# CHECK-NEXT: 1 1 0.50 add x3, x3, x13 +# CHECK-NEXT: 1 1 0.50 subs x1, x1, #1 +# CHECK-NEXT: 1 2 1.00 * str w0, [x21, x18, lsl #2] # CHECK: Dynamic Dispatch Stall Cycles: -# CHECK-NEXT: RAT - Register unavailable: 8 (47.1%) +# CHECK-NEXT: RAT - Register unavailable: 6 (35.3%) # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 Index: llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s @@ -29,13 +29,13 @@ # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 2 3 1.00 * ldr w4, [x2], #4 # CHECK-NEXT: 1 3 1.00 * ldr w5, [x3] -# CHECK-NEXT: 1 4 1.00 madd w0, w5, w4, w0 -# CHECK-NEXT: 1 3 0.50 add x3, x3, x13 -# CHECK-NEXT: 1 3 0.50 subs x1, x1, #1 -# CHECK-NEXT: 1 1 1.00 * str w0, [x21, x18, lsl #2] +# CHECK-NEXT: 1 3 1.00 madd w0, w5, w4, w0 +# CHECK-NEXT: 1 1 0.50 add x3, x3, x13 +# CHECK-NEXT: 1 1 0.50 subs x1, x1, #1 +# CHECK-NEXT: 1 2 1.00 * str w0, [x21, x18, lsl #2] # CHECK: Dynamic Dispatch Stall Cycles: -# CHECK-NEXT: RAT - Register unavailable: 8 (47.1%) +# CHECK-NEXT: RAT - Register unavailable: 6 (35.3%) # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 @@ -95,15 +95,15 @@ # CHECK: [0,0] DeeE . . .. ldr w4, [x2], #4 # CHECK-NEXT: [0,1] .DeeE. . .. ldr w5, [x3] -# CHECK-NEXT: [0,2] . DeeeE . .. madd w0, w5, w4, w0 -# CHECK-NEXT: [0,3] . DeeE . .. add x3, x3, x13 -# CHECK-NEXT: [0,4] . DeeE . .. subs x1, x1, #1 +# CHECK-NEXT: [0,2] . DeeE . .. madd w0, w5, w4, w0 +# CHECK-NEXT: [0,3] . .DE . .. add x3, x3, x13 +# CHECK-NEXT: [0,4] . .DE . .. subs x1, x1, #1 # CHECK-NEXT: [0,5] . . DE . .. str w0, [x21, x18, lsl #2] # CHECK-NEXT: [1,0] . . DeeE .. ldr w4, [x2], #4 # CHECK-NEXT: [1,1] . . DeeE .. ldr w5, [x3] -# CHECK-NEXT: [1,2] . . . DeeeE madd w0, w5, w4, w0 -# CHECK-NEXT: [1,3] . . . DeeE add x3, x3, x13 -# CHECK-NEXT: [1,4] . . . DeeE subs x1, x1, #1 +# CHECK-NEXT: [1,2] . . . DeeE. madd w0, w5, w4, w0 +# CHECK-NEXT: [1,3] . . . DE. add x3, x3, x13 +# CHECK-NEXT: [1,4] . . . DE. subs x1, x1, #1 # CHECK-NEXT: [1,5] . . . DE str w0, [x21, x18, lsl #2] # CHECK: Average Wait times (based on the timeline view): Index: llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s @@ -1377,307 +1377,307 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 0.50 add w2, w3, #4095 -# CHECK-NEXT: 1 3 0.50 add w30, w29, #1, lsl #12 -# CHECK-NEXT: 1 3 0.50 add w13, w5, #4095, lsl #12 -# CHECK-NEXT: 1 3 0.50 add x5, x7, #1638 -# CHECK-NEXT: 1 3 0.50 add w20, wsp, #801 -# CHECK-NEXT: 1 3 0.50 add wsp, wsp, #1104 -# CHECK-NEXT: 1 3 0.50 add wsp, w30, #4084 -# CHECK-NEXT: 1 3 0.50 add x0, x24, #291 -# CHECK-NEXT: 1 3 0.50 add x3, x24, #4095, lsl #12 -# CHECK-NEXT: 1 3 0.50 add x8, sp, #1074 -# CHECK-NEXT: 1 3 0.50 add sp, x29, #3816 -# CHECK-NEXT: 1 3 0.50 sub w0, wsp, #4077 -# CHECK-NEXT: 1 3 0.50 sub w4, w20, #546, lsl #12 -# CHECK-NEXT: 1 3 0.50 sub sp, sp, #288 -# CHECK-NEXT: 1 3 0.50 sub wsp, w19, #16 -# CHECK-NEXT: 1 3 0.50 adds w13, w23, #291, lsl #12 -# CHECK-NEXT: 1 3 0.50 cmn w2, #4095 -# CHECK-NEXT: 1 3 0.50 adds w20, wsp, #0 -# CHECK-NEXT: 1 3 0.50 cmn x3, #1, lsl #12 -# CHECK-NEXT: 1 3 0.50 cmp sp, #20, lsl #12 -# CHECK-NEXT: 1 3 0.50 cmp x30, #4095 -# CHECK-NEXT: 1 3 0.50 subs x4, sp, #3822 -# CHECK-NEXT: 1 3 0.50 cmn w3, #291, lsl #12 -# CHECK-NEXT: 1 3 0.50 cmn wsp, #1365 -# CHECK-NEXT: 1 3 0.50 cmn sp, #1092, lsl #12 -# CHECK-NEXT: 1 3 0.50 mov sp, x30 -# CHECK-NEXT: 1 3 0.50 mov wsp, w20 -# CHECK-NEXT: 1 3 0.50 mov x11, sp -# CHECK-NEXT: 1 3 0.50 mov w24, wsp -# CHECK-NEXT: 1 3 0.50 add w3, w5, w7 -# CHECK-NEXT: 1 3 0.50 add wzr, w3, w5 -# CHECK-NEXT: 1 3 0.50 add w20, wzr, w4 -# CHECK-NEXT: 1 3 0.50 add w4, w6, wzr -# CHECK-NEXT: 1 3 0.50 add w11, w13, w15 -# CHECK-NEXT: 1 3 0.50 add w9, w3, wzr, lsl #10 -# CHECK-NEXT: 1 3 0.50 add w17, w29, w20, lsl #31 -# CHECK-NEXT: 1 3 0.50 add w21, w22, w23, lsr #0 -# CHECK-NEXT: 1 3 0.50 add w24, w25, w26, lsr #18 -# CHECK-NEXT: 1 3 0.50 add w27, w28, w29, lsr #31 -# CHECK-NEXT: 1 3 0.50 add w2, w3, w4, asr #0 -# CHECK-NEXT: 1 3 0.50 add w5, w6, w7, asr #21 -# CHECK-NEXT: 1 3 0.50 add w8, w9, w10, asr #31 -# CHECK-NEXT: 1 3 0.50 add x3, x5, x7 -# CHECK-NEXT: 1 3 0.50 add xzr, x3, x5 -# CHECK-NEXT: 1 3 0.50 add x20, xzr, x4 -# CHECK-NEXT: 1 3 0.50 add x4, x6, xzr -# CHECK-NEXT: 1 3 0.50 add x11, x13, x15 -# CHECK-NEXT: 1 3 0.50 add x9, x3, xzr, lsl #10 -# CHECK-NEXT: 1 3 0.50 add x17, x29, x20, lsl #63 -# CHECK-NEXT: 1 3 0.50 add x21, x22, x23, lsr #0 -# CHECK-NEXT: 1 3 0.50 add x24, x25, x26, lsr #18 -# CHECK-NEXT: 1 3 0.50 add x27, x28, x29, lsr #63 -# CHECK-NEXT: 1 3 0.50 add x2, x3, x4, asr #0 -# CHECK-NEXT: 1 3 0.50 add x5, x6, x7, asr #21 -# CHECK-NEXT: 1 3 0.50 add x8, x9, x10, asr #63 -# CHECK-NEXT: 1 3 0.50 adds w3, w5, w7 -# CHECK-NEXT: 1 3 0.50 cmn w3, w5 -# CHECK-NEXT: 1 3 0.50 adds w20, wzr, w4 -# CHECK-NEXT: 1 3 0.50 adds w4, w6, wzr -# CHECK-NEXT: 1 3 0.50 adds w11, w13, w15 -# CHECK-NEXT: 1 3 0.50 adds w9, w3, wzr, lsl #10 -# CHECK-NEXT: 1 3 0.50 adds w17, w29, w20, lsl #31 -# CHECK-NEXT: 1 3 0.50 adds w21, w22, w23, lsr #0 -# CHECK-NEXT: 1 3 0.50 adds w24, w25, w26, lsr #18 -# CHECK-NEXT: 1 3 0.50 adds w27, w28, w29, lsr #31 -# CHECK-NEXT: 1 3 0.50 adds w2, w3, w4, asr #0 -# CHECK-NEXT: 1 3 0.50 adds w5, w6, w7, asr #21 -# CHECK-NEXT: 1 3 0.50 adds w8, w9, w10, asr #31 -# CHECK-NEXT: 1 3 0.50 adds x3, x5, x7 -# CHECK-NEXT: 1 3 0.50 cmn x3, x5 -# CHECK-NEXT: 1 3 0.50 adds x20, xzr, x4 -# CHECK-NEXT: 1 3 0.50 adds x4, x6, xzr -# CHECK-NEXT: 1 3 0.50 adds x11, x13, x15 -# CHECK-NEXT: 1 3 0.50 adds x9, x3, xzr, lsl #10 -# CHECK-NEXT: 1 3 0.50 adds x17, x29, x20, lsl #63 -# CHECK-NEXT: 1 3 0.50 adds x21, x22, x23, lsr #0 -# CHECK-NEXT: 1 3 0.50 adds x24, x25, x26, lsr #18 -# CHECK-NEXT: 1 3 0.50 adds x27, x28, x29, lsr #63 -# CHECK-NEXT: 1 3 0.50 adds x2, x3, x4, asr #0 -# CHECK-NEXT: 1 3 0.50 adds x5, x6, x7, asr #21 -# CHECK-NEXT: 1 3 0.50 adds x8, x9, x10, asr #63 -# CHECK-NEXT: 1 3 0.50 sub w3, w5, w7 -# CHECK-NEXT: 1 3 0.50 sub wzr, w3, w5 -# CHECK-NEXT: 1 3 0.50 sub w4, w6, wzr -# CHECK-NEXT: 1 3 0.50 sub w11, w13, w15 -# CHECK-NEXT: 1 3 0.50 sub w9, w3, wzr, lsl #10 -# CHECK-NEXT: 1 3 0.50 sub w17, w29, w20, lsl #31 -# CHECK-NEXT: 1 3 0.50 sub w21, w22, w23, lsr #0 -# CHECK-NEXT: 1 3 0.50 sub w24, w25, w26, lsr #18 -# CHECK-NEXT: 1 3 0.50 sub w27, w28, w29, lsr #31 -# CHECK-NEXT: 1 3 0.50 sub w2, w3, w4, asr #0 -# CHECK-NEXT: 1 3 0.50 sub w5, w6, w7, asr #21 -# CHECK-NEXT: 1 3 0.50 sub w8, w9, w10, asr #31 -# CHECK-NEXT: 1 3 0.50 sub x3, x5, x7 -# CHECK-NEXT: 1 3 0.50 sub xzr, x3, x5 -# CHECK-NEXT: 1 3 0.50 sub x4, x6, xzr -# CHECK-NEXT: 1 3 0.50 sub x11, x13, x15 -# CHECK-NEXT: 1 3 0.50 sub x9, x3, xzr, lsl #10 -# CHECK-NEXT: 1 3 0.50 sub x17, x29, x20, lsl #63 -# CHECK-NEXT: 1 3 0.50 sub x21, x22, x23, lsr #0 -# CHECK-NEXT: 1 3 0.50 sub x24, x25, x26, lsr #18 -# CHECK-NEXT: 1 3 0.50 sub x27, x28, x29, lsr #63 -# CHECK-NEXT: 1 3 0.50 sub x2, x3, x4, asr #0 -# CHECK-NEXT: 1 3 0.50 sub x5, x6, x7, asr #21 -# CHECK-NEXT: 1 3 0.50 sub x8, x9, x10, asr #63 -# CHECK-NEXT: 1 3 0.50 subs w3, w5, w7 -# CHECK-NEXT: 1 3 0.50 cmp w3, w5 -# CHECK-NEXT: 1 3 0.50 subs w4, w6, wzr -# CHECK-NEXT: 1 3 0.50 subs w11, w13, w15 -# CHECK-NEXT: 1 3 0.50 subs w9, w3, wzr, lsl #10 -# CHECK-NEXT: 1 3 0.50 subs w17, w29, w20, lsl #31 -# CHECK-NEXT: 1 3 0.50 subs w21, w22, w23, lsr #0 -# CHECK-NEXT: 1 3 0.50 subs w24, w25, w26, lsr #18 -# CHECK-NEXT: 1 3 0.50 subs w27, w28, w29, lsr #31 -# CHECK-NEXT: 1 3 0.50 subs w2, w3, w4, asr #0 -# CHECK-NEXT: 1 3 0.50 subs w5, w6, w7, asr #21 -# CHECK-NEXT: 1 3 0.50 subs w8, w9, w10, asr #31 -# CHECK-NEXT: 1 3 0.50 subs x3, x5, x7 -# CHECK-NEXT: 1 3 0.50 cmp x3, x5 -# CHECK-NEXT: 1 3 0.50 subs x4, x6, xzr -# CHECK-NEXT: 1 3 0.50 subs x11, x13, x15 -# CHECK-NEXT: 1 3 0.50 subs x9, x3, xzr, lsl #10 -# CHECK-NEXT: 1 3 0.50 subs x17, x29, x20, lsl #63 -# CHECK-NEXT: 1 3 0.50 subs x21, x22, x23, lsr #0 -# CHECK-NEXT: 1 3 0.50 subs x24, x25, x26, lsr #18 -# CHECK-NEXT: 1 3 0.50 subs x27, x28, x29, lsr #63 -# CHECK-NEXT: 1 3 0.50 subs x2, x3, x4, asr #0 -# CHECK-NEXT: 1 3 0.50 subs x5, x6, x7, asr #21 -# CHECK-NEXT: 1 3 0.50 subs x8, x9, x10, asr #63 -# CHECK-NEXT: 1 3 0.50 cmn wzr, w4 -# CHECK-NEXT: 1 3 0.50 cmn w5, wzr -# CHECK-NEXT: 1 3 0.50 cmn w6, w7 -# CHECK-NEXT: 1 3 0.50 cmn w8, w9, lsl #15 -# CHECK-NEXT: 1 3 0.50 cmn w10, w11, lsl #31 -# CHECK-NEXT: 1 3 0.50 cmn w12, w13, lsr #0 -# CHECK-NEXT: 1 3 0.50 cmn w14, w15, lsr #21 -# CHECK-NEXT: 1 3 0.50 cmn w16, w17, lsr #31 -# CHECK-NEXT: 1 3 0.50 cmn w18, w19, asr #0 -# CHECK-NEXT: 1 3 0.50 cmn w20, w21, asr #22 -# CHECK-NEXT: 1 3 0.50 cmn w22, w23, asr #31 -# CHECK-NEXT: 1 3 0.50 cmn x0, x3 -# CHECK-NEXT: 1 3 0.50 cmn xzr, x4 -# CHECK-NEXT: 1 3 0.50 cmn x5, xzr -# CHECK-NEXT: 1 3 0.50 cmn x6, x7 -# CHECK-NEXT: 1 3 0.50 cmn x8, x9, lsl #15 -# CHECK-NEXT: 1 3 0.50 cmn x10, x11, lsl #63 -# CHECK-NEXT: 1 3 0.50 cmn x12, x13, lsr #0 -# CHECK-NEXT: 1 3 0.50 cmn x14, x15, lsr #41 -# CHECK-NEXT: 1 3 0.50 cmn x16, x17, lsr #63 -# CHECK-NEXT: 1 3 0.50 cmn x18, x19, asr #0 -# CHECK-NEXT: 1 3 0.50 cmn x20, x21, asr #55 -# CHECK-NEXT: 1 3 0.50 cmn x22, x23, asr #63 -# CHECK-NEXT: 1 3 0.50 cmp w0, w3 -# CHECK-NEXT: 1 3 0.50 cmp wzr, w4 -# CHECK-NEXT: 1 3 0.50 cmp w5, wzr -# CHECK-NEXT: 1 3 0.50 cmp w6, w7 -# CHECK-NEXT: 1 3 0.50 cmp w8, w9, lsl #15 -# CHECK-NEXT: 1 3 0.50 cmp w10, w11, lsl #31 -# CHECK-NEXT: 1 3 0.50 cmp w12, w13, lsr #0 -# CHECK-NEXT: 1 3 0.50 cmp w14, w15, lsr #21 -# CHECK-NEXT: 1 3 0.50 cmp w18, w19, asr #0 -# CHECK-NEXT: 1 3 0.50 cmp w20, w21, asr #22 -# CHECK-NEXT: 1 3 0.50 cmp w22, w23, asr #31 -# CHECK-NEXT: 1 3 0.50 cmp x0, x3 -# CHECK-NEXT: 1 3 0.50 cmp xzr, x4 -# CHECK-NEXT: 1 3 0.50 cmp x5, xzr -# CHECK-NEXT: 1 3 0.50 cmp x6, x7 -# CHECK-NEXT: 1 3 0.50 cmp x8, x9, lsl #15 -# CHECK-NEXT: 1 3 0.50 cmp x10, x11, lsl #63 -# CHECK-NEXT: 1 3 0.50 cmp x12, x13, lsr #0 -# CHECK-NEXT: 1 3 0.50 cmp x14, x15, lsr #41 -# CHECK-NEXT: 1 3 0.50 cmp x16, x17, lsr #63 -# CHECK-NEXT: 1 3 0.50 cmp x18, x19, asr #0 -# CHECK-NEXT: 1 3 0.50 cmp x20, x21, asr #55 -# CHECK-NEXT: 1 3 0.50 cmp x22, x23, asr #63 -# CHECK-NEXT: 1 3 0.50 cmp wzr, w0 -# CHECK-NEXT: 1 3 0.50 cmp xzr, x0 -# CHECK-NEXT: 1 3 0.50 adc w29, w27, w25 -# CHECK-NEXT: 1 3 0.50 adc wzr, w3, w4 -# CHECK-NEXT: 1 3 0.50 adc w9, wzr, w10 -# CHECK-NEXT: 1 3 0.50 adc w20, w0, wzr -# CHECK-NEXT: 1 3 0.50 adc x29, x27, x25 -# CHECK-NEXT: 1 3 0.50 adc xzr, x3, x4 -# CHECK-NEXT: 1 3 0.50 adc x9, xzr, x10 -# CHECK-NEXT: 1 3 0.50 adc x20, x0, xzr -# CHECK-NEXT: 1 3 0.50 adcs w29, w27, w25 -# CHECK-NEXT: 1 3 0.50 adcs wzr, w3, w4 -# CHECK-NEXT: 1 3 0.50 adcs w9, wzr, w10 -# CHECK-NEXT: 1 3 0.50 adcs w20, w0, wzr -# CHECK-NEXT: 1 3 0.50 adcs x29, x27, x25 -# CHECK-NEXT: 1 3 0.50 adcs xzr, x3, x4 -# CHECK-NEXT: 1 3 0.50 adcs x9, xzr, x10 -# CHECK-NEXT: 1 3 0.50 adcs x20, x0, xzr -# CHECK-NEXT: 1 3 0.50 sbc w29, w27, w25 -# CHECK-NEXT: 1 3 0.50 sbc wzr, w3, w4 -# CHECK-NEXT: 1 3 0.50 ngc w9, w10 -# CHECK-NEXT: 1 3 0.50 sbc w20, w0, wzr -# CHECK-NEXT: 1 3 0.50 sbc x29, x27, x25 -# CHECK-NEXT: 1 3 0.50 sbc xzr, x3, x4 -# CHECK-NEXT: 1 3 0.50 ngc x9, x10 -# CHECK-NEXT: 1 3 0.50 sbc x20, x0, xzr -# CHECK-NEXT: 1 3 0.50 sbcs w29, w27, w25 -# CHECK-NEXT: 1 3 0.50 sbcs wzr, w3, w4 -# CHECK-NEXT: 1 3 0.50 ngcs w9, w10 -# CHECK-NEXT: 1 3 0.50 sbcs w20, w0, wzr -# CHECK-NEXT: 1 3 0.50 sbcs x29, x27, x25 -# CHECK-NEXT: 1 3 0.50 sbcs xzr, x3, x4 -# CHECK-NEXT: 1 3 0.50 ngcs x9, x10 -# CHECK-NEXT: 1 3 0.50 sbcs x20, x0, xzr -# CHECK-NEXT: 1 3 0.50 ngc w3, w12 -# CHECK-NEXT: 1 3 0.50 ngc wzr, w9 -# CHECK-NEXT: 1 3 0.50 ngc w23, wzr -# CHECK-NEXT: 1 3 0.50 ngc x29, x30 -# CHECK-NEXT: 1 3 0.50 ngc xzr, x0 -# CHECK-NEXT: 1 3 0.50 ngc x0, xzr -# CHECK-NEXT: 1 3 0.50 ngcs w3, w12 -# CHECK-NEXT: 1 3 0.50 ngcs wzr, w9 -# CHECK-NEXT: 1 3 0.50 ngcs w23, wzr -# CHECK-NEXT: 1 3 0.50 ngcs x29, x30 -# CHECK-NEXT: 1 3 0.50 ngcs xzr, x0 -# CHECK-NEXT: 1 3 0.50 ngcs x0, xzr -# CHECK-NEXT: 1 3 0.50 sbfx x1, x2, #3, #2 -# CHECK-NEXT: 1 3 0.50 asr x3, x4, #63 -# CHECK-NEXT: 1 3 0.50 asr wzr, wzr, #31 -# CHECK-NEXT: 1 3 0.50 sbfx w12, w9, #0, #1 -# CHECK-NEXT: 1 3 0.50 ubfiz x4, x5, #52, #11 -# CHECK-NEXT: 1 3 0.50 ubfx xzr, x4, #0, #1 -# CHECK-NEXT: 1 3 0.50 ubfiz x4, xzr, #1, #6 -# CHECK-NEXT: 1 3 0.50 lsr x5, x6, #12 -# CHECK-NEXT: 1 3 0.50 bfi x4, x5, #52, #11 -# CHECK-NEXT: 1 3 0.50 bfxil xzr, x4, #0, #1 -# CHECK-NEXT: 1 3 0.50 bfc x4, #1, #6 -# CHECK-NEXT: 1 3 0.50 bfxil x5, x6, #12, #52 -# CHECK-NEXT: 1 3 0.50 sxtb w1, w2 -# CHECK-NEXT: 1 3 0.50 sxtb xzr, w3 -# CHECK-NEXT: 1 3 0.50 sxth w9, w10 -# CHECK-NEXT: 1 3 0.50 sxth x0, w1 -# CHECK-NEXT: 1 3 0.50 sxtw x3, w30 -# CHECK-NEXT: 1 3 0.50 uxtb w1, w2 -# CHECK-NEXT: 1 3 0.50 uxth w9, w10 -# CHECK-NEXT: 1 3 0.50 ubfx x3, x30, #0, #32 -# CHECK-NEXT: 1 3 0.50 asr w3, w2, #0 -# CHECK-NEXT: 1 3 0.50 asr w9, w10, #31 -# CHECK-NEXT: 1 3 0.50 asr x20, x21, #63 -# CHECK-NEXT: 1 3 0.50 asr w1, wzr, #3 -# CHECK-NEXT: 1 3 0.50 lsr w3, w2, #0 -# CHECK-NEXT: 1 3 0.50 lsr w9, w10, #31 -# CHECK-NEXT: 1 3 0.50 lsr x20, x21, #63 -# CHECK-NEXT: 1 3 0.50 lsr wzr, wzr, #3 -# CHECK-NEXT: 1 3 0.50 lsr w3, w2, #0 -# CHECK-NEXT: 1 3 0.50 lsl w9, w10, #31 -# CHECK-NEXT: 1 3 0.50 lsl x20, x21, #63 -# CHECK-NEXT: 1 3 0.50 lsl w1, wzr, #3 -# CHECK-NEXT: 1 3 0.50 sbfx w9, w10, #0, #1 -# CHECK-NEXT: 1 3 0.50 sbfiz x2, x3, #63, #1 -# CHECK-NEXT: 1 3 0.50 asr x19, x20, #0 -# CHECK-NEXT: 1 3 0.50 sbfiz x9, x10, #5, #59 -# CHECK-NEXT: 1 3 0.50 asr w9, w10, #0 -# CHECK-NEXT: 1 3 0.50 sbfiz w11, w12, #31, #1 -# CHECK-NEXT: 1 3 0.50 sbfiz w13, w14, #29, #3 -# CHECK-NEXT: 1 3 0.50 sbfiz xzr, xzr, #10, #11 -# CHECK-NEXT: 1 3 0.50 sbfx w9, w10, #0, #1 -# CHECK-NEXT: 1 3 0.50 asr x2, x3, #63 -# CHECK-NEXT: 1 3 0.50 asr x19, x20, #0 -# CHECK-NEXT: 1 3 0.50 asr x9, x10, #5 -# CHECK-NEXT: 1 3 0.50 asr w9, w10, #0 -# CHECK-NEXT: 1 3 0.50 asr w11, w12, #31 -# CHECK-NEXT: 1 3 0.50 asr w13, w14, #29 -# CHECK-NEXT: 1 3 0.50 sbfx xzr, xzr, #10, #11 -# CHECK-NEXT: 1 3 0.50 bfxil w9, w10, #0, #1 -# CHECK-NEXT: 1 3 0.50 bfi x2, x3, #63, #1 -# CHECK-NEXT: 1 3 0.50 bfxil x19, x20, #0, #64 -# CHECK-NEXT: 1 3 0.50 bfi x9, x10, #5, #59 -# CHECK-NEXT: 1 3 0.50 bfxil w9, w10, #0, #32 -# CHECK-NEXT: 1 3 0.50 bfi w11, w12, #31, #1 -# CHECK-NEXT: 1 3 0.50 bfi w13, w14, #29, #3 -# CHECK-NEXT: 1 3 0.50 bfc xzr, #10, #11 -# CHECK-NEXT: 1 3 0.50 bfxil w9, w10, #0, #1 -# CHECK-NEXT: 1 3 0.50 bfxil x2, x3, #63, #1 -# CHECK-NEXT: 1 3 0.50 bfxil x19, x20, #0, #64 -# CHECK-NEXT: 1 3 0.50 bfxil x9, x10, #5, #59 -# CHECK-NEXT: 1 3 0.50 bfxil w9, w10, #0, #32 -# CHECK-NEXT: 1 3 0.50 bfxil w11, w12, #31, #1 -# CHECK-NEXT: 1 3 0.50 bfxil w13, w14, #29, #3 -# CHECK-NEXT: 1 3 0.50 bfxil xzr, xzr, #10, #11 -# CHECK-NEXT: 1 3 0.50 ubfx w9, w10, #0, #1 -# CHECK-NEXT: 1 3 0.50 lsl x2, x3, #63 -# CHECK-NEXT: 1 3 0.50 lsr x19, x20, #0 -# CHECK-NEXT: 1 3 0.50 lsl x9, x10, #5 -# CHECK-NEXT: 1 3 0.50 lsr w9, w10, #0 -# CHECK-NEXT: 1 3 0.50 lsl w11, w12, #31 -# CHECK-NEXT: 1 3 0.50 lsl w13, w14, #29 -# CHECK-NEXT: 1 3 0.50 ubfiz xzr, xzr, #10, #11 -# CHECK-NEXT: 1 3 0.50 ubfx w9, w10, #0, #1 -# CHECK-NEXT: 1 3 0.50 lsr x2, x3, #63 -# CHECK-NEXT: 1 3 0.50 lsr x19, x20, #0 -# CHECK-NEXT: 1 3 0.50 lsr x9, x10, #5 -# CHECK-NEXT: 1 3 0.50 lsr w9, w10, #0 -# CHECK-NEXT: 1 3 0.50 lsr w11, w12, #31 -# CHECK-NEXT: 1 3 0.50 lsr w13, w14, #29 -# CHECK-NEXT: 1 3 0.50 ubfx xzr, xzr, #10, #11 +# CHECK-NEXT: 1 1 0.50 add w2, w3, #4095 +# CHECK-NEXT: 1 1 0.50 add w30, w29, #1, lsl #12 +# CHECK-NEXT: 1 1 0.50 add w13, w5, #4095, lsl #12 +# CHECK-NEXT: 1 1 0.50 add x5, x7, #1638 +# CHECK-NEXT: 1 1 0.50 add w20, wsp, #801 +# CHECK-NEXT: 1 1 0.50 add wsp, wsp, #1104 +# CHECK-NEXT: 1 1 0.50 add wsp, w30, #4084 +# CHECK-NEXT: 1 1 0.50 add x0, x24, #291 +# CHECK-NEXT: 1 1 0.50 add x3, x24, #4095, lsl #12 +# CHECK-NEXT: 1 1 0.50 add x8, sp, #1074 +# CHECK-NEXT: 1 1 0.50 add sp, x29, #3816 +# CHECK-NEXT: 1 1 0.50 sub w0, wsp, #4077 +# CHECK-NEXT: 1 1 0.50 sub w4, w20, #546, lsl #12 +# CHECK-NEXT: 1 1 0.50 sub sp, sp, #288 +# CHECK-NEXT: 1 1 0.50 sub wsp, w19, #16 +# CHECK-NEXT: 1 1 0.50 adds w13, w23, #291, lsl #12 +# CHECK-NEXT: 1 1 0.50 cmn w2, #4095 +# CHECK-NEXT: 1 1 0.50 adds w20, wsp, #0 +# CHECK-NEXT: 1 1 0.50 cmn x3, #1, lsl #12 +# CHECK-NEXT: 1 1 0.50 cmp sp, #20, lsl #12 +# CHECK-NEXT: 1 1 0.50 cmp x30, #4095 +# CHECK-NEXT: 1 1 0.50 subs x4, sp, #3822 +# CHECK-NEXT: 1 1 0.50 cmn w3, #291, lsl #12 +# CHECK-NEXT: 1 1 0.50 cmn wsp, #1365 +# CHECK-NEXT: 1 1 0.50 cmn sp, #1092, lsl #12 +# CHECK-NEXT: 1 1 0.50 mov sp, x30 +# CHECK-NEXT: 1 1 0.50 mov wsp, w20 +# CHECK-NEXT: 1 1 0.50 mov x11, sp +# CHECK-NEXT: 1 1 0.50 mov w24, wsp +# CHECK-NEXT: 1 1 0.50 add w3, w5, w7 +# CHECK-NEXT: 1 1 0.50 add wzr, w3, w5 +# CHECK-NEXT: 1 1 0.50 add w20, wzr, w4 +# CHECK-NEXT: 1 1 0.50 add w4, w6, wzr +# CHECK-NEXT: 1 1 0.50 add w11, w13, w15 +# CHECK-NEXT: 1 2 0.50 add w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1 2 0.50 add w17, w29, w20, lsl #31 +# CHECK-NEXT: 1 2 0.50 add w21, w22, w23, lsr #0 +# CHECK-NEXT: 1 2 0.50 add w24, w25, w26, lsr #18 +# CHECK-NEXT: 1 2 0.50 add w27, w28, w29, lsr #31 +# CHECK-NEXT: 1 2 0.50 add w2, w3, w4, asr #0 +# CHECK-NEXT: 1 2 0.50 add w5, w6, w7, asr #21 +# CHECK-NEXT: 1 2 0.50 add w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 0.50 add x3, x5, x7 +# CHECK-NEXT: 1 1 0.50 add xzr, x3, x5 +# CHECK-NEXT: 1 1 0.50 add x20, xzr, x4 +# CHECK-NEXT: 1 1 0.50 add x4, x6, xzr +# CHECK-NEXT: 1 1 0.50 add x11, x13, x15 +# CHECK-NEXT: 1 2 0.50 add x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1 2 0.50 add x17, x29, x20, lsl #63 +# CHECK-NEXT: 1 2 0.50 add x21, x22, x23, lsr #0 +# CHECK-NEXT: 1 2 0.50 add x24, x25, x26, lsr #18 +# CHECK-NEXT: 1 2 0.50 add x27, x28, x29, lsr #63 +# CHECK-NEXT: 1 2 0.50 add x2, x3, x4, asr #0 +# CHECK-NEXT: 1 2 0.50 add x5, x6, x7, asr #21 +# CHECK-NEXT: 1 2 0.50 add x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 0.50 adds w3, w5, w7 +# CHECK-NEXT: 1 1 0.50 cmn w3, w5 +# CHECK-NEXT: 1 1 0.50 adds w20, wzr, w4 +# CHECK-NEXT: 1 1 0.50 adds w4, w6, wzr +# CHECK-NEXT: 1 1 0.50 adds w11, w13, w15 +# CHECK-NEXT: 1 2 0.50 adds w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1 2 0.50 adds w17, w29, w20, lsl #31 +# CHECK-NEXT: 1 2 0.50 adds w21, w22, w23, lsr #0 +# CHECK-NEXT: 1 2 0.50 adds w24, w25, w26, lsr #18 +# CHECK-NEXT: 1 2 0.50 adds w27, w28, w29, lsr #31 +# CHECK-NEXT: 1 2 0.50 adds w2, w3, w4, asr #0 +# CHECK-NEXT: 1 2 0.50 adds w5, w6, w7, asr #21 +# CHECK-NEXT: 1 2 0.50 adds w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 0.50 adds x3, x5, x7 +# CHECK-NEXT: 1 1 0.50 cmn x3, x5 +# CHECK-NEXT: 1 1 0.50 adds x20, xzr, x4 +# CHECK-NEXT: 1 1 0.50 adds x4, x6, xzr +# CHECK-NEXT: 1 1 0.50 adds x11, x13, x15 +# CHECK-NEXT: 1 2 0.50 adds x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1 2 0.50 adds x17, x29, x20, lsl #63 +# CHECK-NEXT: 1 2 0.50 adds x21, x22, x23, lsr #0 +# CHECK-NEXT: 1 2 0.50 adds x24, x25, x26, lsr #18 +# CHECK-NEXT: 1 2 0.50 adds x27, x28, x29, lsr #63 +# CHECK-NEXT: 1 2 0.50 adds x2, x3, x4, asr #0 +# CHECK-NEXT: 1 2 0.50 adds x5, x6, x7, asr #21 +# CHECK-NEXT: 1 2 0.50 adds x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 0.50 sub w3, w5, w7 +# CHECK-NEXT: 1 1 0.50 sub wzr, w3, w5 +# CHECK-NEXT: 1 1 0.50 sub w4, w6, wzr +# CHECK-NEXT: 1 1 0.50 sub w11, w13, w15 +# CHECK-NEXT: 1 2 0.50 sub w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1 2 0.50 sub w17, w29, w20, lsl #31 +# CHECK-NEXT: 1 2 0.50 sub w21, w22, w23, lsr #0 +# CHECK-NEXT: 1 2 0.50 sub w24, w25, w26, lsr #18 +# CHECK-NEXT: 1 2 0.50 sub w27, w28, w29, lsr #31 +# CHECK-NEXT: 1 2 0.50 sub w2, w3, w4, asr #0 +# CHECK-NEXT: 1 2 0.50 sub w5, w6, w7, asr #21 +# CHECK-NEXT: 1 2 0.50 sub w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 0.50 sub x3, x5, x7 +# CHECK-NEXT: 1 1 0.50 sub xzr, x3, x5 +# CHECK-NEXT: 1 1 0.50 sub x4, x6, xzr +# CHECK-NEXT: 1 1 0.50 sub x11, x13, x15 +# CHECK-NEXT: 1 2 0.50 sub x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1 2 0.50 sub x17, x29, x20, lsl #63 +# CHECK-NEXT: 1 2 0.50 sub x21, x22, x23, lsr #0 +# CHECK-NEXT: 1 2 0.50 sub x24, x25, x26, lsr #18 +# CHECK-NEXT: 1 2 0.50 sub x27, x28, x29, lsr #63 +# CHECK-NEXT: 1 2 0.50 sub x2, x3, x4, asr #0 +# CHECK-NEXT: 1 2 0.50 sub x5, x6, x7, asr #21 +# CHECK-NEXT: 1 2 0.50 sub x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 0.50 subs w3, w5, w7 +# CHECK-NEXT: 1 1 0.50 cmp w3, w5 +# CHECK-NEXT: 1 1 0.50 subs w4, w6, wzr +# CHECK-NEXT: 1 1 0.50 subs w11, w13, w15 +# CHECK-NEXT: 1 2 0.50 subs w9, w3, wzr, lsl #10 +# CHECK-NEXT: 1 2 0.50 subs w17, w29, w20, lsl #31 +# CHECK-NEXT: 1 2 0.50 subs w21, w22, w23, lsr #0 +# CHECK-NEXT: 1 2 0.50 subs w24, w25, w26, lsr #18 +# CHECK-NEXT: 1 2 0.50 subs w27, w28, w29, lsr #31 +# CHECK-NEXT: 1 2 0.50 subs w2, w3, w4, asr #0 +# CHECK-NEXT: 1 2 0.50 subs w5, w6, w7, asr #21 +# CHECK-NEXT: 1 2 0.50 subs w8, w9, w10, asr #31 +# CHECK-NEXT: 1 1 0.50 subs x3, x5, x7 +# CHECK-NEXT: 1 1 0.50 cmp x3, x5 +# CHECK-NEXT: 1 1 0.50 subs x4, x6, xzr +# CHECK-NEXT: 1 1 0.50 subs x11, x13, x15 +# CHECK-NEXT: 1 2 0.50 subs x9, x3, xzr, lsl #10 +# CHECK-NEXT: 1 2 0.50 subs x17, x29, x20, lsl #63 +# CHECK-NEXT: 1 2 0.50 subs x21, x22, x23, lsr #0 +# CHECK-NEXT: 1 2 0.50 subs x24, x25, x26, lsr #18 +# CHECK-NEXT: 1 2 0.50 subs x27, x28, x29, lsr #63 +# CHECK-NEXT: 1 2 0.50 subs x2, x3, x4, asr #0 +# CHECK-NEXT: 1 2 0.50 subs x5, x6, x7, asr #21 +# CHECK-NEXT: 1 2 0.50 subs x8, x9, x10, asr #63 +# CHECK-NEXT: 1 1 0.50 cmn wzr, w4 +# CHECK-NEXT: 1 1 0.50 cmn w5, wzr +# CHECK-NEXT: 1 1 0.50 cmn w6, w7 +# CHECK-NEXT: 1 2 0.50 cmn w8, w9, lsl #15 +# CHECK-NEXT: 1 2 0.50 cmn w10, w11, lsl #31 +# CHECK-NEXT: 1 2 0.50 cmn w12, w13, lsr #0 +# CHECK-NEXT: 1 2 0.50 cmn w14, w15, lsr #21 +# CHECK-NEXT: 1 2 0.50 cmn w16, w17, lsr #31 +# CHECK-NEXT: 1 2 0.50 cmn w18, w19, asr #0 +# CHECK-NEXT: 1 2 0.50 cmn w20, w21, asr #22 +# CHECK-NEXT: 1 2 0.50 cmn w22, w23, asr #31 +# CHECK-NEXT: 1 1 0.50 cmn x0, x3 +# CHECK-NEXT: 1 1 0.50 cmn xzr, x4 +# CHECK-NEXT: 1 1 0.50 cmn x5, xzr +# CHECK-NEXT: 1 1 0.50 cmn x6, x7 +# CHECK-NEXT: 1 2 0.50 cmn x8, x9, lsl #15 +# CHECK-NEXT: 1 2 0.50 cmn x10, x11, lsl #63 +# CHECK-NEXT: 1 2 0.50 cmn x12, x13, lsr #0 +# CHECK-NEXT: 1 2 0.50 cmn x14, x15, lsr #41 +# CHECK-NEXT: 1 2 0.50 cmn x16, x17, lsr #63 +# CHECK-NEXT: 1 2 0.50 cmn x18, x19, asr #0 +# CHECK-NEXT: 1 2 0.50 cmn x20, x21, asr #55 +# CHECK-NEXT: 1 2 0.50 cmn x22, x23, asr #63 +# CHECK-NEXT: 1 1 0.50 cmp w0, w3 +# CHECK-NEXT: 1 1 0.50 cmp wzr, w4 +# CHECK-NEXT: 1 1 0.50 cmp w5, wzr +# CHECK-NEXT: 1 1 0.50 cmp w6, w7 +# CHECK-NEXT: 1 2 0.50 cmp w8, w9, lsl #15 +# CHECK-NEXT: 1 2 0.50 cmp w10, w11, lsl #31 +# CHECK-NEXT: 1 2 0.50 cmp w12, w13, lsr #0 +# CHECK-NEXT: 1 2 0.50 cmp w14, w15, lsr #21 +# CHECK-NEXT: 1 2 0.50 cmp w18, w19, asr #0 +# CHECK-NEXT: 1 2 0.50 cmp w20, w21, asr #22 +# CHECK-NEXT: 1 2 0.50 cmp w22, w23, asr #31 +# CHECK-NEXT: 1 1 0.50 cmp x0, x3 +# CHECK-NEXT: 1 1 0.50 cmp xzr, x4 +# CHECK-NEXT: 1 1 0.50 cmp x5, xzr +# CHECK-NEXT: 1 1 0.50 cmp x6, x7 +# CHECK-NEXT: 1 2 0.50 cmp x8, x9, lsl #15 +# CHECK-NEXT: 1 2 0.50 cmp x10, x11, lsl #63 +# CHECK-NEXT: 1 2 0.50 cmp x12, x13, lsr #0 +# CHECK-NEXT: 1 2 0.50 cmp x14, x15, lsr #41 +# CHECK-NEXT: 1 2 0.50 cmp x16, x17, lsr #63 +# CHECK-NEXT: 1 2 0.50 cmp x18, x19, asr #0 +# CHECK-NEXT: 1 2 0.50 cmp x20, x21, asr #55 +# CHECK-NEXT: 1 2 0.50 cmp x22, x23, asr #63 +# CHECK-NEXT: 1 1 0.50 cmp wzr, w0 +# CHECK-NEXT: 1 1 0.50 cmp xzr, x0 +# CHECK-NEXT: 1 1 0.50 adc w29, w27, w25 +# CHECK-NEXT: 1 1 0.50 adc wzr, w3, w4 +# CHECK-NEXT: 1 1 0.50 adc w9, wzr, w10 +# CHECK-NEXT: 1 1 0.50 adc w20, w0, wzr +# CHECK-NEXT: 1 1 0.50 adc x29, x27, x25 +# CHECK-NEXT: 1 1 0.50 adc xzr, x3, x4 +# CHECK-NEXT: 1 1 0.50 adc x9, xzr, x10 +# CHECK-NEXT: 1 1 0.50 adc x20, x0, xzr +# CHECK-NEXT: 1 1 0.50 adcs w29, w27, w25 +# CHECK-NEXT: 1 1 0.50 adcs wzr, w3, w4 +# CHECK-NEXT: 1 1 0.50 adcs w9, wzr, w10 +# CHECK-NEXT: 1 1 0.50 adcs w20, w0, wzr +# CHECK-NEXT: 1 1 0.50 adcs x29, x27, x25 +# CHECK-NEXT: 1 1 0.50 adcs xzr, x3, x4 +# CHECK-NEXT: 1 1 0.50 adcs x9, xzr, x10 +# CHECK-NEXT: 1 1 0.50 adcs x20, x0, xzr +# CHECK-NEXT: 1 1 0.50 sbc w29, w27, w25 +# CHECK-NEXT: 1 1 0.50 sbc wzr, w3, w4 +# CHECK-NEXT: 1 1 0.50 ngc w9, w10 +# CHECK-NEXT: 1 1 0.50 sbc w20, w0, wzr +# CHECK-NEXT: 1 1 0.50 sbc x29, x27, x25 +# CHECK-NEXT: 1 1 0.50 sbc xzr, x3, x4 +# CHECK-NEXT: 1 1 0.50 ngc x9, x10 +# CHECK-NEXT: 1 1 0.50 sbc x20, x0, xzr +# CHECK-NEXT: 1 1 0.50 sbcs w29, w27, w25 +# CHECK-NEXT: 1 1 0.50 sbcs wzr, w3, w4 +# CHECK-NEXT: 1 1 0.50 ngcs w9, w10 +# CHECK-NEXT: 1 1 0.50 sbcs w20, w0, wzr +# CHECK-NEXT: 1 1 0.50 sbcs x29, x27, x25 +# CHECK-NEXT: 1 1 0.50 sbcs xzr, x3, x4 +# CHECK-NEXT: 1 1 0.50 ngcs x9, x10 +# CHECK-NEXT: 1 1 0.50 sbcs x20, x0, xzr +# CHECK-NEXT: 1 1 0.50 ngc w3, w12 +# CHECK-NEXT: 1 1 0.50 ngc wzr, w9 +# CHECK-NEXT: 1 1 0.50 ngc w23, wzr +# CHECK-NEXT: 1 1 0.50 ngc x29, x30 +# CHECK-NEXT: 1 1 0.50 ngc xzr, x0 +# CHECK-NEXT: 1 1 0.50 ngc x0, xzr +# CHECK-NEXT: 1 1 0.50 ngcs w3, w12 +# CHECK-NEXT: 1 1 0.50 ngcs wzr, w9 +# CHECK-NEXT: 1 1 0.50 ngcs w23, wzr +# CHECK-NEXT: 1 1 0.50 ngcs x29, x30 +# CHECK-NEXT: 1 1 0.50 ngcs xzr, x0 +# CHECK-NEXT: 1 1 0.50 ngcs x0, xzr +# CHECK-NEXT: 1 2 0.50 sbfx x1, x2, #3, #2 +# CHECK-NEXT: 1 2 0.50 asr x3, x4, #63 +# CHECK-NEXT: 1 2 0.50 asr wzr, wzr, #31 +# CHECK-NEXT: 1 2 0.50 sbfx w12, w9, #0, #1 +# CHECK-NEXT: 1 2 0.50 ubfiz x4, x5, #52, #11 +# CHECK-NEXT: 1 2 0.50 ubfx xzr, x4, #0, #1 +# CHECK-NEXT: 1 2 0.50 ubfiz x4, xzr, #1, #6 +# CHECK-NEXT: 1 2 0.50 lsr x5, x6, #12 +# CHECK-NEXT: 1 2 0.50 bfi x4, x5, #52, #11 +# CHECK-NEXT: 1 2 0.50 bfxil xzr, x4, #0, #1 +# CHECK-NEXT: 1 2 0.50 bfc x4, #1, #6 +# CHECK-NEXT: 1 2 0.50 bfxil x5, x6, #12, #52 +# CHECK-NEXT: 1 2 0.50 sxtb w1, w2 +# CHECK-NEXT: 1 2 0.50 sxtb xzr, w3 +# CHECK-NEXT: 1 2 0.50 sxth w9, w10 +# CHECK-NEXT: 1 2 0.50 sxth x0, w1 +# CHECK-NEXT: 1 2 0.50 sxtw x3, w30 +# CHECK-NEXT: 1 2 0.50 uxtb w1, w2 +# CHECK-NEXT: 1 2 0.50 uxth w9, w10 +# CHECK-NEXT: 1 2 0.50 ubfx x3, x30, #0, #32 +# CHECK-NEXT: 1 2 0.50 asr w3, w2, #0 +# CHECK-NEXT: 1 2 0.50 asr w9, w10, #31 +# CHECK-NEXT: 1 2 0.50 asr x20, x21, #63 +# CHECK-NEXT: 1 2 0.50 asr w1, wzr, #3 +# CHECK-NEXT: 1 2 0.50 lsr w3, w2, #0 +# CHECK-NEXT: 1 2 0.50 lsr w9, w10, #31 +# CHECK-NEXT: 1 2 0.50 lsr x20, x21, #63 +# CHECK-NEXT: 1 2 0.50 lsr wzr, wzr, #3 +# CHECK-NEXT: 1 2 0.50 lsr w3, w2, #0 +# CHECK-NEXT: 1 2 0.50 lsl w9, w10, #31 +# CHECK-NEXT: 1 2 0.50 lsl x20, x21, #63 +# CHECK-NEXT: 1 2 0.50 lsl w1, wzr, #3 +# CHECK-NEXT: 1 2 0.50 sbfx w9, w10, #0, #1 +# CHECK-NEXT: 1 2 0.50 sbfiz x2, x3, #63, #1 +# CHECK-NEXT: 1 2 0.50 asr x19, x20, #0 +# CHECK-NEXT: 1 2 0.50 sbfiz x9, x10, #5, #59 +# CHECK-NEXT: 1 2 0.50 asr w9, w10, #0 +# CHECK-NEXT: 1 2 0.50 sbfiz w11, w12, #31, #1 +# CHECK-NEXT: 1 2 0.50 sbfiz w13, w14, #29, #3 +# CHECK-NEXT: 1 2 0.50 sbfiz xzr, xzr, #10, #11 +# CHECK-NEXT: 1 2 0.50 sbfx w9, w10, #0, #1 +# CHECK-NEXT: 1 2 0.50 asr x2, x3, #63 +# CHECK-NEXT: 1 2 0.50 asr x19, x20, #0 +# CHECK-NEXT: 1 2 0.50 asr x9, x10, #5 +# CHECK-NEXT: 1 2 0.50 asr w9, w10, #0 +# CHECK-NEXT: 1 2 0.50 asr w11, w12, #31 +# CHECK-NEXT: 1 2 0.50 asr w13, w14, #29 +# CHECK-NEXT: 1 2 0.50 sbfx xzr, xzr, #10, #11 +# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #1 +# CHECK-NEXT: 1 2 0.50 bfi x2, x3, #63, #1 +# CHECK-NEXT: 1 2 0.50 bfxil x19, x20, #0, #64 +# CHECK-NEXT: 1 2 0.50 bfi x9, x10, #5, #59 +# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #32 +# CHECK-NEXT: 1 2 0.50 bfi w11, w12, #31, #1 +# CHECK-NEXT: 1 2 0.50 bfi w13, w14, #29, #3 +# CHECK-NEXT: 1 2 0.50 bfc xzr, #10, #11 +# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #1 +# CHECK-NEXT: 1 2 0.50 bfxil x2, x3, #63, #1 +# CHECK-NEXT: 1 2 0.50 bfxil x19, x20, #0, #64 +# CHECK-NEXT: 1 2 0.50 bfxil x9, x10, #5, #59 +# CHECK-NEXT: 1 2 0.50 bfxil w9, w10, #0, #32 +# CHECK-NEXT: 1 2 0.50 bfxil w11, w12, #31, #1 +# CHECK-NEXT: 1 2 0.50 bfxil w13, w14, #29, #3 +# CHECK-NEXT: 1 2 0.50 bfxil xzr, xzr, #10, #11 +# CHECK-NEXT: 1 2 0.50 ubfx w9, w10, #0, #1 +# CHECK-NEXT: 1 2 0.50 lsl x2, x3, #63 +# CHECK-NEXT: 1 2 0.50 lsr x19, x20, #0 +# CHECK-NEXT: 1 2 0.50 lsl x9, x10, #5 +# CHECK-NEXT: 1 2 0.50 lsr w9, w10, #0 +# CHECK-NEXT: 1 2 0.50 lsl w11, w12, #31 +# CHECK-NEXT: 1 2 0.50 lsl w13, w14, #29 +# CHECK-NEXT: 1 2 0.50 ubfiz xzr, xzr, #10, #11 +# CHECK-NEXT: 1 2 0.50 ubfx w9, w10, #0, #1 +# CHECK-NEXT: 1 2 0.50 lsr x2, x3, #63 +# CHECK-NEXT: 1 2 0.50 lsr x19, x20, #0 +# CHECK-NEXT: 1 2 0.50 lsr x9, x10, #5 +# CHECK-NEXT: 1 2 0.50 lsr w9, w10, #0 +# CHECK-NEXT: 1 2 0.50 lsr w11, w12, #31 +# CHECK-NEXT: 1 2 0.50 lsr w13, w14, #29 +# CHECK-NEXT: 1 2 0.50 ubfx xzr, xzr, #10, #11 # CHECK-NEXT: 1 1 1.00 cbz w5, #4 # CHECK-NEXT: 1 1 1.00 cbz x5, #0 # CHECK-NEXT: 1 1 1.00 cbnz x2, #-4 @@ -1687,181 +1687,181 @@ # CHECK-NEXT: 1 1 1.00 b.ne #4 # CHECK-NEXT: 1 1 1.00 b.ge #1048572 # CHECK-NEXT: 1 1 1.00 b.ge #-4 -# CHECK-NEXT: 1 3 0.50 ccmp w1, #31, #0, eq -# CHECK-NEXT: 1 3 0.50 ccmp w3, #0, #15, hs -# CHECK-NEXT: 1 3 0.50 ccmp wzr, #15, #13, hs -# CHECK-NEXT: 1 3 0.50 ccmp x9, #31, #0, le -# CHECK-NEXT: 1 3 0.50 ccmp x3, #0, #15, gt -# CHECK-NEXT: 1 3 0.50 ccmp xzr, #5, #7, ne -# CHECK-NEXT: 1 3 0.50 ccmn w1, #31, #0, eq -# CHECK-NEXT: 1 3 0.50 ccmn w3, #0, #15, hs -# CHECK-NEXT: 1 3 0.50 ccmn wzr, #15, #13, hs -# CHECK-NEXT: 1 3 0.50 ccmn x9, #31, #0, le -# CHECK-NEXT: 1 3 0.50 ccmn x3, #0, #15, gt -# CHECK-NEXT: 1 3 0.50 ccmn xzr, #5, #7, ne -# CHECK-NEXT: 1 3 0.50 ccmp w1, wzr, #0, eq -# CHECK-NEXT: 1 3 0.50 ccmp w3, w0, #15, hs -# CHECK-NEXT: 1 3 0.50 ccmp wzr, w15, #13, hs -# CHECK-NEXT: 1 3 0.50 ccmp x9, xzr, #0, le -# CHECK-NEXT: 1 3 0.50 ccmp x3, x0, #15, gt -# CHECK-NEXT: 1 3 0.50 ccmp xzr, x5, #7, ne -# CHECK-NEXT: 1 3 0.50 ccmn w1, wzr, #0, eq -# CHECK-NEXT: 1 3 0.50 ccmn w3, w0, #15, hs -# CHECK-NEXT: 1 3 0.50 ccmn wzr, w15, #13, hs -# CHECK-NEXT: 1 3 0.50 ccmn x9, xzr, #0, le -# CHECK-NEXT: 1 3 0.50 ccmn x3, x0, #15, gt -# CHECK-NEXT: 1 3 0.50 ccmn xzr, x5, #7, ne -# CHECK-NEXT: 1 3 0.50 csel w1, w0, w19, ne -# CHECK-NEXT: 1 3 0.50 csel wzr, w5, w9, eq -# CHECK-NEXT: 1 3 0.50 csel w9, wzr, w30, gt -# CHECK-NEXT: 1 3 0.50 csel w1, w28, wzr, mi -# CHECK-NEXT: 1 3 0.50 csel x19, x23, x29, lt -# CHECK-NEXT: 1 3 0.50 csel xzr, x3, x4, ge -# CHECK-NEXT: 1 3 0.50 csel x5, xzr, x6, hs -# CHECK-NEXT: 1 3 0.50 csel x7, x8, xzr, lo -# CHECK-NEXT: 1 3 0.50 csinc w1, w0, w19, ne -# CHECK-NEXT: 1 3 0.50 csinc wzr, w5, w9, eq -# CHECK-NEXT: 1 3 0.50 csinc w9, wzr, w30, gt -# CHECK-NEXT: 1 3 0.50 csinc w1, w28, wzr, mi -# CHECK-NEXT: 1 3 0.50 csinc x19, x23, x29, lt -# CHECK-NEXT: 1 3 0.50 csinc xzr, x3, x4, ge -# CHECK-NEXT: 1 3 0.50 csinc x5, xzr, x6, hs -# CHECK-NEXT: 1 3 0.50 csinc x7, x8, xzr, lo -# CHECK-NEXT: 1 3 0.50 csinv w1, w0, w19, ne -# CHECK-NEXT: 1 3 0.50 csinv wzr, w5, w9, eq -# CHECK-NEXT: 1 3 0.50 csinv w9, wzr, w30, gt -# CHECK-NEXT: 1 3 0.50 csinv w1, w28, wzr, mi -# CHECK-NEXT: 1 3 0.50 csinv x19, x23, x29, lt -# CHECK-NEXT: 1 3 0.50 csinv xzr, x3, x4, ge -# CHECK-NEXT: 1 3 0.50 csinv x5, xzr, x6, hs -# CHECK-NEXT: 1 3 0.50 csinv x7, x8, xzr, lo -# CHECK-NEXT: 1 3 0.50 csneg w1, w0, w19, ne -# CHECK-NEXT: 1 3 0.50 csneg wzr, w5, w9, eq -# CHECK-NEXT: 1 3 0.50 csneg w9, wzr, w30, gt -# CHECK-NEXT: 1 3 0.50 csneg w1, w28, wzr, mi -# CHECK-NEXT: 1 3 0.50 csneg x19, x23, x29, lt -# CHECK-NEXT: 1 3 0.50 csneg xzr, x3, x4, ge -# CHECK-NEXT: 1 3 0.50 csneg x5, xzr, x6, hs -# CHECK-NEXT: 1 3 0.50 csneg x7, x8, xzr, lo -# CHECK-NEXT: 1 3 0.50 cset w3, eq -# CHECK-NEXT: 1 3 0.50 cset x9, pl -# CHECK-NEXT: 1 3 0.50 csetm w20, ne -# CHECK-NEXT: 1 3 0.50 csetm x30, ge -# CHECK-NEXT: 1 3 0.50 csinc w2, wzr, wzr, al -# CHECK-NEXT: 1 3 0.50 csinv x3, xzr, xzr, nv -# CHECK-NEXT: 1 3 0.50 cinc w3, w5, gt -# CHECK-NEXT: 1 3 0.50 cinc wzr, w4, le -# CHECK-NEXT: 1 3 0.50 cset w9, lt -# CHECK-NEXT: 1 3 0.50 cinc x3, x5, gt -# CHECK-NEXT: 1 3 0.50 cinc xzr, x4, le -# CHECK-NEXT: 1 3 0.50 cset x9, lt -# CHECK-NEXT: 1 3 0.50 csinc w5, w6, w6, nv -# CHECK-NEXT: 1 3 0.50 csinc x1, x2, x2, al -# CHECK-NEXT: 1 3 0.50 cinv w3, w5, gt -# CHECK-NEXT: 1 3 0.50 cinv wzr, w4, le -# CHECK-NEXT: 1 3 0.50 csetm w9, lt -# CHECK-NEXT: 1 3 0.50 cinv x3, x5, gt -# CHECK-NEXT: 1 3 0.50 cinv xzr, x4, le -# CHECK-NEXT: 1 3 0.50 csetm x9, lt -# CHECK-NEXT: 1 3 0.50 csinv x1, x0, x0, al -# CHECK-NEXT: 1 3 0.50 csinv w9, w8, w8, nv -# CHECK-NEXT: 1 3 0.50 cneg w3, w5, gt -# CHECK-NEXT: 1 3 0.50 cneg wzr, w4, le -# CHECK-NEXT: 1 3 0.50 cneg w9, wzr, lt -# CHECK-NEXT: 1 3 0.50 cneg x3, x5, gt -# CHECK-NEXT: 1 3 0.50 cneg xzr, x4, le -# CHECK-NEXT: 1 3 0.50 cneg x9, xzr, lt -# CHECK-NEXT: 1 3 0.50 csneg x4, x8, x8, al -# CHECK-NEXT: 1 3 0.50 csinv w9, w8, w8, nv -# CHECK-NEXT: 1 3 0.50 rbit w0, w7 -# CHECK-NEXT: 1 3 0.50 rbit x18, x3 -# CHECK-NEXT: 1 3 0.50 rev16 w17, w1 -# CHECK-NEXT: 1 3 0.50 rev16 x5, x2 -# CHECK-NEXT: 1 3 0.50 rev w18, w0 -# CHECK-NEXT: 1 3 0.50 rev32 x20, x1 -# CHECK-NEXT: 1 3 0.50 rev x22, x2 -# CHECK-NEXT: 1 3 0.50 clz w24, w3 -# CHECK-NEXT: 1 3 0.50 clz x26, x4 -# CHECK-NEXT: 1 3 0.50 cls w3, w5 -# CHECK-NEXT: 1 3 0.50 cls x20, x5 +# CHECK-NEXT: 1 1 0.50 ccmp w1, #31, #0, eq +# CHECK-NEXT: 1 1 0.50 ccmp w3, #0, #15, hs +# CHECK-NEXT: 1 1 0.50 ccmp wzr, #15, #13, hs +# CHECK-NEXT: 1 1 0.50 ccmp x9, #31, #0, le +# CHECK-NEXT: 1 1 0.50 ccmp x3, #0, #15, gt +# CHECK-NEXT: 1 1 0.50 ccmp xzr, #5, #7, ne +# CHECK-NEXT: 1 1 0.50 ccmn w1, #31, #0, eq +# CHECK-NEXT: 1 1 0.50 ccmn w3, #0, #15, hs +# CHECK-NEXT: 1 1 0.50 ccmn wzr, #15, #13, hs +# CHECK-NEXT: 1 1 0.50 ccmn x9, #31, #0, le +# CHECK-NEXT: 1 1 0.50 ccmn x3, #0, #15, gt +# CHECK-NEXT: 1 1 0.50 ccmn xzr, #5, #7, ne +# CHECK-NEXT: 1 1 0.50 ccmp w1, wzr, #0, eq +# CHECK-NEXT: 1 1 0.50 ccmp w3, w0, #15, hs +# CHECK-NEXT: 1 1 0.50 ccmp wzr, w15, #13, hs +# CHECK-NEXT: 1 1 0.50 ccmp x9, xzr, #0, le +# CHECK-NEXT: 1 1 0.50 ccmp x3, x0, #15, gt +# CHECK-NEXT: 1 1 0.50 ccmp xzr, x5, #7, ne +# CHECK-NEXT: 1 1 0.50 ccmn w1, wzr, #0, eq +# CHECK-NEXT: 1 1 0.50 ccmn w3, w0, #15, hs +# CHECK-NEXT: 1 1 0.50 ccmn wzr, w15, #13, hs +# CHECK-NEXT: 1 1 0.50 ccmn x9, xzr, #0, le +# CHECK-NEXT: 1 1 0.50 ccmn x3, x0, #15, gt +# CHECK-NEXT: 1 1 0.50 ccmn xzr, x5, #7, ne +# CHECK-NEXT: 1 1 0.50 csel w1, w0, w19, ne +# CHECK-NEXT: 1 1 0.50 csel wzr, w5, w9, eq +# CHECK-NEXT: 1 1 0.50 csel w9, wzr, w30, gt +# CHECK-NEXT: 1 1 0.50 csel w1, w28, wzr, mi +# CHECK-NEXT: 1 1 0.50 csel x19, x23, x29, lt +# CHECK-NEXT: 1 1 0.50 csel xzr, x3, x4, ge +# CHECK-NEXT: 1 1 0.50 csel x5, xzr, x6, hs +# CHECK-NEXT: 1 1 0.50 csel x7, x8, xzr, lo +# CHECK-NEXT: 1 1 0.50 csinc w1, w0, w19, ne +# CHECK-NEXT: 1 1 0.50 csinc wzr, w5, w9, eq +# CHECK-NEXT: 1 1 0.50 csinc w9, wzr, w30, gt +# CHECK-NEXT: 1 1 0.50 csinc w1, w28, wzr, mi +# CHECK-NEXT: 1 1 0.50 csinc x19, x23, x29, lt +# CHECK-NEXT: 1 1 0.50 csinc xzr, x3, x4, ge +# CHECK-NEXT: 1 1 0.50 csinc x5, xzr, x6, hs +# CHECK-NEXT: 1 1 0.50 csinc x7, x8, xzr, lo +# CHECK-NEXT: 1 1 0.50 csinv w1, w0, w19, ne +# CHECK-NEXT: 1 1 0.50 csinv wzr, w5, w9, eq +# CHECK-NEXT: 1 1 0.50 csinv w9, wzr, w30, gt +# CHECK-NEXT: 1 1 0.50 csinv w1, w28, wzr, mi +# CHECK-NEXT: 1 1 0.50 csinv x19, x23, x29, lt +# CHECK-NEXT: 1 1 0.50 csinv xzr, x3, x4, ge +# CHECK-NEXT: 1 1 0.50 csinv x5, xzr, x6, hs +# CHECK-NEXT: 1 1 0.50 csinv x7, x8, xzr, lo +# CHECK-NEXT: 1 1 0.50 csneg w1, w0, w19, ne +# CHECK-NEXT: 1 1 0.50 csneg wzr, w5, w9, eq +# CHECK-NEXT: 1 1 0.50 csneg w9, wzr, w30, gt +# CHECK-NEXT: 1 1 0.50 csneg w1, w28, wzr, mi +# CHECK-NEXT: 1 1 0.50 csneg x19, x23, x29, lt +# CHECK-NEXT: 1 1 0.50 csneg xzr, x3, x4, ge +# CHECK-NEXT: 1 1 0.50 csneg x5, xzr, x6, hs +# CHECK-NEXT: 1 1 0.50 csneg x7, x8, xzr, lo +# CHECK-NEXT: 1 1 0.50 cset w3, eq +# CHECK-NEXT: 1 1 0.50 cset x9, pl +# CHECK-NEXT: 1 1 0.50 csetm w20, ne +# CHECK-NEXT: 1 1 0.50 csetm x30, ge +# CHECK-NEXT: 1 1 0.50 csinc w2, wzr, wzr, al +# CHECK-NEXT: 1 1 0.50 csinv x3, xzr, xzr, nv +# CHECK-NEXT: 1 1 0.50 cinc w3, w5, gt +# CHECK-NEXT: 1 1 0.50 cinc wzr, w4, le +# CHECK-NEXT: 1 1 0.50 cset w9, lt +# CHECK-NEXT: 1 1 0.50 cinc x3, x5, gt +# CHECK-NEXT: 1 1 0.50 cinc xzr, x4, le +# CHECK-NEXT: 1 1 0.50 cset x9, lt +# CHECK-NEXT: 1 1 0.50 csinc w5, w6, w6, nv +# CHECK-NEXT: 1 1 0.50 csinc x1, x2, x2, al +# CHECK-NEXT: 1 1 0.50 cinv w3, w5, gt +# CHECK-NEXT: 1 1 0.50 cinv wzr, w4, le +# CHECK-NEXT: 1 1 0.50 csetm w9, lt +# CHECK-NEXT: 1 1 0.50 cinv x3, x5, gt +# CHECK-NEXT: 1 1 0.50 cinv xzr, x4, le +# CHECK-NEXT: 1 1 0.50 csetm x9, lt +# CHECK-NEXT: 1 1 0.50 csinv x1, x0, x0, al +# CHECK-NEXT: 1 1 0.50 csinv w9, w8, w8, nv +# CHECK-NEXT: 1 1 0.50 cneg w3, w5, gt +# CHECK-NEXT: 1 1 0.50 cneg wzr, w4, le +# CHECK-NEXT: 1 1 0.50 cneg w9, wzr, lt +# CHECK-NEXT: 1 1 0.50 cneg x3, x5, gt +# CHECK-NEXT: 1 1 0.50 cneg xzr, x4, le +# CHECK-NEXT: 1 1 0.50 cneg x9, xzr, lt +# CHECK-NEXT: 1 1 0.50 csneg x4, x8, x8, al +# CHECK-NEXT: 1 1 0.50 csinv w9, w8, w8, nv +# CHECK-NEXT: 1 2 0.50 rbit w0, w7 +# CHECK-NEXT: 1 2 0.50 rbit x18, x3 +# CHECK-NEXT: 1 1 0.50 rev16 w17, w1 +# CHECK-NEXT: 1 1 0.50 rev16 x5, x2 +# CHECK-NEXT: 1 1 0.50 rev w18, w0 +# CHECK-NEXT: 1 1 0.50 rev32 x20, x1 +# CHECK-NEXT: 1 1 0.50 rev x22, x2 +# CHECK-NEXT: 1 1 0.50 clz w24, w3 +# CHECK-NEXT: 1 1 0.50 clz x26, x4 +# CHECK-NEXT: 1 1 0.50 cls w3, w5 +# CHECK-NEXT: 1 1 0.50 cls x20, x5 # CHECK-NEXT: 1 8 8.00 udiv w0, w7, w10 # CHECK-NEXT: 1 8 8.00 udiv x9, x22, x4 # CHECK-NEXT: 1 8 8.00 sdiv w12, w21, w0 # CHECK-NEXT: 1 8 8.00 sdiv x13, x2, x1 -# CHECK-NEXT: 1 3 0.50 lsl w11, w12, w13 -# CHECK-NEXT: 1 3 0.50 lsl x14, x15, x16 -# CHECK-NEXT: 1 3 0.50 lsr w17, w18, w19 -# CHECK-NEXT: 1 3 0.50 lsr x20, x21, x22 -# CHECK-NEXT: 1 3 0.50 asr w23, w24, w25 -# CHECK-NEXT: 1 3 0.50 asr x26, x27, x28 -# CHECK-NEXT: 1 3 0.50 ror w0, w1, w2 -# CHECK-NEXT: 1 3 0.50 ror x3, x4, x5 -# CHECK-NEXT: 1 3 0.50 lsl w6, w7, w8 -# CHECK-NEXT: 1 3 0.50 lsl x9, x10, x11 -# CHECK-NEXT: 1 3 0.50 lsr w12, w13, w14 -# CHECK-NEXT: 1 3 0.50 lsr x15, x16, x17 -# CHECK-NEXT: 1 3 0.50 asr w18, w19, w20 -# CHECK-NEXT: 1 3 0.50 asr x21, x22, x23 -# CHECK-NEXT: 1 3 0.50 ror w24, w25, w26 -# CHECK-NEXT: 1 3 0.50 ror x27, x28, x29 +# CHECK-NEXT: 1 2 0.50 lsl w11, w12, w13 +# CHECK-NEXT: 1 2 0.50 lsl x14, x15, x16 +# CHECK-NEXT: 1 2 0.50 lsr w17, w18, w19 +# CHECK-NEXT: 1 2 0.50 lsr x20, x21, x22 +# CHECK-NEXT: 1 2 0.50 asr w23, w24, w25 +# CHECK-NEXT: 1 2 0.50 asr x26, x27, x28 +# CHECK-NEXT: 1 2 0.50 ror w0, w1, w2 +# CHECK-NEXT: 1 2 0.50 ror x3, x4, x5 +# CHECK-NEXT: 1 2 0.50 lsl w6, w7, w8 +# CHECK-NEXT: 1 2 0.50 lsl x9, x10, x11 +# CHECK-NEXT: 1 2 0.50 lsr w12, w13, w14 +# CHECK-NEXT: 1 2 0.50 lsr x15, x16, x17 +# CHECK-NEXT: 1 2 0.50 asr w18, w19, w20 +# CHECK-NEXT: 1 2 0.50 asr x21, x22, x23 +# CHECK-NEXT: 1 2 0.50 ror w24, w25, w26 +# CHECK-NEXT: 1 2 0.50 ror x27, x28, x29 # CHECK-NEXT: 1 4 1.00 smulh x30, x29, x28 # CHECK-NEXT: 1 4 1.00 smulh xzr, x27, x26 # CHECK-NEXT: 1 4 1.00 umulh x30, x29, x28 # CHECK-NEXT: 1 4 1.00 umulh x23, x30, xzr -# CHECK-NEXT: 1 4 1.00 madd w1, w3, w7, w4 -# CHECK-NEXT: 1 4 1.00 madd wzr, w0, w9, w11 -# CHECK-NEXT: 1 4 1.00 madd w13, wzr, w4, w4 -# CHECK-NEXT: 1 4 1.00 madd w19, w30, wzr, w29 -# CHECK-NEXT: 1 4 1.00 mul w4, w5, w6 +# CHECK-NEXT: 1 3 1.00 madd w1, w3, w7, w4 +# CHECK-NEXT: 1 3 1.00 madd wzr, w0, w9, w11 +# CHECK-NEXT: 1 3 1.00 madd w13, wzr, w4, w4 +# CHECK-NEXT: 1 3 1.00 madd w19, w30, wzr, w29 +# CHECK-NEXT: 1 3 1.00 mul w4, w5, w6 # CHECK-NEXT: 1 4 1.00 madd x1, x3, x7, x4 # CHECK-NEXT: 1 4 1.00 madd xzr, x0, x9, x11 # CHECK-NEXT: 1 4 1.00 madd x13, xzr, x4, x4 # CHECK-NEXT: 1 4 1.00 madd x19, x30, xzr, x29 # CHECK-NEXT: 1 4 1.00 mul x4, x5, x6 -# CHECK-NEXT: 1 4 1.00 msub w1, w3, w7, w4 -# CHECK-NEXT: 1 4 1.00 msub wzr, w0, w9, w11 -# CHECK-NEXT: 1 4 1.00 msub w13, wzr, w4, w4 -# CHECK-NEXT: 1 4 1.00 msub w19, w30, wzr, w29 -# CHECK-NEXT: 1 4 1.00 mneg w4, w5, w6 +# CHECK-NEXT: 1 3 1.00 msub w1, w3, w7, w4 +# CHECK-NEXT: 1 3 1.00 msub wzr, w0, w9, w11 +# CHECK-NEXT: 1 3 1.00 msub w13, wzr, w4, w4 +# CHECK-NEXT: 1 3 1.00 msub w19, w30, wzr, w29 +# CHECK-NEXT: 1 3 1.00 mneg w4, w5, w6 # CHECK-NEXT: 1 4 1.00 msub x1, x3, x7, x4 # CHECK-NEXT: 1 4 1.00 msub xzr, x0, x9, x11 # CHECK-NEXT: 1 4 1.00 msub x13, xzr, x4, x4 # CHECK-NEXT: 1 4 1.00 msub x19, x30, xzr, x29 # CHECK-NEXT: 1 4 1.00 mneg x4, x5, x6 -# CHECK-NEXT: 1 4 1.00 smaddl x3, w5, w2, x9 -# CHECK-NEXT: 1 4 1.00 smaddl xzr, w10, w11, x12 -# CHECK-NEXT: 1 4 1.00 smaddl x13, wzr, w14, x15 -# CHECK-NEXT: 1 4 1.00 smaddl x16, w17, wzr, x18 -# CHECK-NEXT: 1 4 1.00 smull x19, w20, w21 -# CHECK-NEXT: 1 4 1.00 smsubl x3, w5, w2, x9 -# CHECK-NEXT: 1 4 1.00 smsubl xzr, w10, w11, x12 -# CHECK-NEXT: 1 4 1.00 smsubl x13, wzr, w14, x15 -# CHECK-NEXT: 1 4 1.00 smsubl x16, w17, wzr, x18 -# CHECK-NEXT: 1 4 1.00 smnegl x19, w20, w21 -# CHECK-NEXT: 1 4 1.00 umaddl x3, w5, w2, x9 -# CHECK-NEXT: 1 4 1.00 umaddl xzr, w10, w11, x12 -# CHECK-NEXT: 1 4 1.00 umaddl x13, wzr, w14, x15 -# CHECK-NEXT: 1 4 1.00 umaddl x16, w17, wzr, x18 -# CHECK-NEXT: 1 4 1.00 umull x19, w20, w21 -# CHECK-NEXT: 1 4 1.00 umsubl x3, w5, w2, x9 -# CHECK-NEXT: 1 4 1.00 umsubl x16, w17, wzr, x18 -# CHECK-NEXT: 1 4 1.00 umnegl x19, w20, w21 +# CHECK-NEXT: 1 3 1.00 smaddl x3, w5, w2, x9 +# CHECK-NEXT: 1 3 1.00 smaddl xzr, w10, w11, x12 +# CHECK-NEXT: 1 3 1.00 smaddl x13, wzr, w14, x15 +# CHECK-NEXT: 1 3 1.00 smaddl x16, w17, wzr, x18 +# CHECK-NEXT: 1 3 1.00 smull x19, w20, w21 +# CHECK-NEXT: 1 3 1.00 smsubl x3, w5, w2, x9 +# CHECK-NEXT: 1 3 1.00 smsubl xzr, w10, w11, x12 +# CHECK-NEXT: 1 3 1.00 smsubl x13, wzr, w14, x15 +# CHECK-NEXT: 1 3 1.00 smsubl x16, w17, wzr, x18 +# CHECK-NEXT: 1 3 1.00 smnegl x19, w20, w21 +# CHECK-NEXT: 1 3 1.00 umaddl x3, w5, w2, x9 +# CHECK-NEXT: 1 3 1.00 umaddl xzr, w10, w11, x12 +# CHECK-NEXT: 1 3 1.00 umaddl x13, wzr, w14, x15 +# CHECK-NEXT: 1 3 1.00 umaddl x16, w17, wzr, x18 +# CHECK-NEXT: 1 3 1.00 umull x19, w20, w21 +# CHECK-NEXT: 1 3 1.00 umsubl x3, w5, w2, x9 +# CHECK-NEXT: 1 3 1.00 umsubl x16, w17, wzr, x18 +# CHECK-NEXT: 1 3 1.00 umnegl x19, w20, w21 # CHECK-NEXT: 1 4 1.00 smulh x30, x29, x28 # CHECK-NEXT: 1 4 1.00 smulh x23, x22, xzr # CHECK-NEXT: 1 4 1.00 umulh x23, x22, xzr # CHECK-NEXT: 1 4 1.00 mul x19, x20, xzr -# CHECK-NEXT: 1 4 1.00 mneg w21, w22, w23 -# CHECK-NEXT: 1 4 1.00 smull x11, w13, w17 -# CHECK-NEXT: 1 4 1.00 umull x11, w13, w17 -# CHECK-NEXT: 1 4 1.00 smnegl x11, w13, w17 -# CHECK-NEXT: 1 4 1.00 umnegl x11, w13, w17 -# CHECK-NEXT: 1 3 0.50 extr w3, w5, w7, #0 -# CHECK-NEXT: 1 3 0.50 extr w11, w13, w17, #31 -# CHECK-NEXT: 1 3 0.50 extr x3, x5, x7, #15 -# CHECK-NEXT: 1 3 0.50 extr x11, x13, x17, #63 -# CHECK-NEXT: 1 3 0.50 ror x19, x23, #24 -# CHECK-NEXT: 1 3 0.50 ror x29, xzr, #63 -# CHECK-NEXT: 1 3 0.50 ror w9, w13, #31 +# CHECK-NEXT: 1 3 1.00 mneg w21, w22, w23 +# CHECK-NEXT: 1 3 1.00 smull x11, w13, w17 +# CHECK-NEXT: 1 3 1.00 umull x11, w13, w17 +# CHECK-NEXT: 1 3 1.00 smnegl x11, w13, w17 +# CHECK-NEXT: 1 3 1.00 umnegl x11, w13, w17 +# CHECK-NEXT: 1 2 0.50 extr w3, w5, w7, #0 +# CHECK-NEXT: 1 2 0.50 extr w11, w13, w17, #31 +# CHECK-NEXT: 1 2 0.50 extr x3, x5, x7, #15 +# CHECK-NEXT: 1 2 0.50 extr x11, x13, x17, #63 +# CHECK-NEXT: 1 2 0.50 ror x19, x23, #24 +# CHECK-NEXT: 1 2 0.50 ror x29, xzr, #63 +# CHECK-NEXT: 1 2 0.50 ror w9, w13, #31 # CHECK-NEXT: 1 3 0.50 fcmp s3, s5 # CHECK-NEXT: 1 3 0.50 fcmp s31, #0.0 # CHECK-NEXT: 1 3 0.50 fcmp s31, #0.0 @@ -2352,63 +2352,63 @@ # CHECK-NEXT: 1 3 1.00 * ldr s10, [x19, #16380] # CHECK-NEXT: 1 3 1.00 * ldr d3, [x10, #32760] # CHECK-NEXT: 1 1 1.00 * str q12, [sp, #65520] -# CHECK-NEXT: 1 4 1.00 * ldrb w3, [sp, x5] -# CHECK-NEXT: 1 4 1.00 * ldrb w9, [x27, x6] -# CHECK-NEXT: 1 4 1.00 * ldrsb w10, [x30, x7] -# CHECK-NEXT: 1 4 1.00 * ldrb w11, [x29, x3, sxtx] -# CHECK-NEXT: 1 1 1.00 * strb w12, [x28, xzr, sxtx] -# CHECK-NEXT: 1 4 1.00 * ldrb w14, [x26, w6, uxtw] -# CHECK-NEXT: 1 4 1.00 * ldrsb w15, [x25, w7, uxtw] -# CHECK-NEXT: 1 4 1.00 * ldrb w17, [x23, w9, sxtw] -# CHECK-NEXT: 1 4 1.00 * ldrsb x18, [x22, w10, sxtw] -# CHECK-NEXT: 1 4 1.00 * ldrsh w3, [sp, x5] -# CHECK-NEXT: 1 4 1.00 * ldrsh w9, [x27, x6] -# CHECK-NEXT: 1 4 1.00 * ldrh w10, [x30, x7, lsl #1] -# CHECK-NEXT: 1 1 1.00 * strh w11, [x29, x3, sxtx] -# CHECK-NEXT: 1 4 1.00 * ldrh w12, [x28, xzr, sxtx] -# CHECK-NEXT: 1 4 1.00 * ldrsh x13, [x27, x5, sxtx #1] -# CHECK-NEXT: 1 4 1.00 * ldrh w14, [x26, w6, uxtw] -# CHECK-NEXT: 1 4 1.00 * ldrh w15, [x25, w7, uxtw] -# CHECK-NEXT: 1 4 1.00 * ldrsh w16, [x24, w8, uxtw #1] -# CHECK-NEXT: 1 4 1.00 * ldrh w17, [x23, w9, sxtw] -# CHECK-NEXT: 1 4 1.00 * ldrh w18, [x22, w10, sxtw] -# CHECK-NEXT: 1 1 1.00 * strh w19, [x21, wzr, sxtw #1] -# CHECK-NEXT: 1 4 1.00 * ldr w3, [sp, x5] -# CHECK-NEXT: 1 4 1.00 * ldr s9, [x27, x6] -# CHECK-NEXT: 1 4 1.00 * ldr w10, [x30, x7, lsl #2] -# CHECK-NEXT: 1 4 1.00 * ldr w11, [x29, x3, sxtx] -# CHECK-NEXT: 1 1 1.00 * str s12, [x28, xzr, sxtx] -# CHECK-NEXT: 1 1 1.00 * str w13, [x27, x5, sxtx #2] -# CHECK-NEXT: 1 1 1.00 * str w14, [x26, w6, uxtw] -# CHECK-NEXT: 1 4 1.00 * ldr w15, [x25, w7, uxtw] -# CHECK-NEXT: 1 4 1.00 * ldr w16, [x24, w8, uxtw #2] -# CHECK-NEXT: 1 4 1.00 * ldrsw x17, [x23, w9, sxtw] -# CHECK-NEXT: 1 4 1.00 * ldr w18, [x22, w10, sxtw] -# CHECK-NEXT: 1 4 1.00 * ldrsw x19, [x21, wzr, sxtw #2] -# CHECK-NEXT: 1 4 1.00 * ldr x3, [sp, x5] -# CHECK-NEXT: 1 1 1.00 * str x9, [x27, x6] -# CHECK-NEXT: 1 4 1.00 * ldr d10, [x30, x7, lsl #3] -# CHECK-NEXT: 1 1 1.00 * str x11, [x29, x3, sxtx] -# CHECK-NEXT: 1 4 1.00 * ldr x12, [x28, xzr, sxtx] -# CHECK-NEXT: 1 4 1.00 * ldr x13, [x27, x5, sxtx #3] +# CHECK-NEXT: 1 5 1.00 * ldrb w3, [sp, x5] +# CHECK-NEXT: 1 5 1.00 * ldrb w9, [x27, x6] +# CHECK-NEXT: 1 5 1.00 * ldrsb w10, [x30, x7] +# CHECK-NEXT: 1 5 1.00 * ldrb w11, [x29, x3, sxtx] +# CHECK-NEXT: 1 2 1.00 * strb w12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 5 1.00 * ldrb w14, [x26, w6, uxtw] +# CHECK-NEXT: 1 5 1.00 * ldrsb w15, [x25, w7, uxtw] +# CHECK-NEXT: 1 5 1.00 * ldrb w17, [x23, w9, sxtw] +# CHECK-NEXT: 1 5 1.00 * ldrsb x18, [x22, w10, sxtw] +# CHECK-NEXT: 1 5 1.00 * ldrsh w3, [sp, x5] +# CHECK-NEXT: 1 5 1.00 * ldrsh w9, [x27, x6] +# CHECK-NEXT: 1 5 1.00 * ldrh w10, [x30, x7, lsl #1] +# CHECK-NEXT: 1 2 1.00 * strh w11, [x29, x3, sxtx] +# CHECK-NEXT: 1 5 1.00 * ldrh w12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 5 1.00 * ldrsh x13, [x27, x5, sxtx #1] +# CHECK-NEXT: 1 5 1.00 * ldrh w14, [x26, w6, uxtw] +# CHECK-NEXT: 1 5 1.00 * ldrh w15, [x25, w7, uxtw] +# CHECK-NEXT: 1 5 1.00 * ldrsh w16, [x24, w8, uxtw #1] +# CHECK-NEXT: 1 5 1.00 * ldrh w17, [x23, w9, sxtw] +# CHECK-NEXT: 1 5 1.00 * ldrh w18, [x22, w10, sxtw] +# CHECK-NEXT: 1 2 1.00 * strh w19, [x21, wzr, sxtw #1] +# CHECK-NEXT: 1 5 1.00 * ldr w3, [sp, x5] +# CHECK-NEXT: 1 5 1.00 * ldr s9, [x27, x6] +# CHECK-NEXT: 1 5 1.00 * ldr w10, [x30, x7, lsl #2] +# CHECK-NEXT: 1 5 1.00 * ldr w11, [x29, x3, sxtx] +# CHECK-NEXT: 1 2 1.00 * str s12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 2 1.00 * str w13, [x27, x5, sxtx #2] +# CHECK-NEXT: 1 2 1.00 * str w14, [x26, w6, uxtw] +# CHECK-NEXT: 1 5 1.00 * ldr w15, [x25, w7, uxtw] +# CHECK-NEXT: 1 5 1.00 * ldr w16, [x24, w8, uxtw #2] +# CHECK-NEXT: 1 5 1.00 * ldrsw x17, [x23, w9, sxtw] +# CHECK-NEXT: 1 5 1.00 * ldr w18, [x22, w10, sxtw] +# CHECK-NEXT: 1 5 1.00 * ldrsw x19, [x21, wzr, sxtw #2] +# CHECK-NEXT: 1 5 1.00 * ldr x3, [sp, x5] +# CHECK-NEXT: 1 2 1.00 * str x9, [x27, x6] +# CHECK-NEXT: 1 5 1.00 * ldr d10, [x30, x7, lsl #3] +# CHECK-NEXT: 1 2 1.00 * str x11, [x29, x3, sxtx] +# CHECK-NEXT: 1 5 1.00 * ldr x12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 5 1.00 * ldr x13, [x27, x5, sxtx #3] # CHECK-NEXT: 1 3 1.00 U prfm pldl1keep, [x26, w6, uxtw] -# CHECK-NEXT: 1 4 1.00 * ldr x15, [x25, w7, uxtw] -# CHECK-NEXT: 1 4 1.00 * ldr x16, [x24, w8, uxtw #3] -# CHECK-NEXT: 1 4 1.00 * ldr x17, [x23, w9, sxtw] -# CHECK-NEXT: 1 4 1.00 * ldr x18, [x22, w10, sxtw] -# CHECK-NEXT: 1 1 1.00 * str d19, [x21, wzr, sxtw #3] -# CHECK-NEXT: 1 4 1.00 * ldr q3, [sp, x5] -# CHECK-NEXT: 1 4 1.00 * ldr q9, [x27, x6] -# CHECK-NEXT: 1 4 1.00 * ldr q10, [x30, x7, lsl #4] -# CHECK-NEXT: 1 1 1.00 * str q11, [x29, x3, sxtx] -# CHECK-NEXT: 1 1 1.00 * str q12, [x28, xzr, sxtx] -# CHECK-NEXT: 1 1 1.00 * str q13, [x27, x5, sxtx #4] -# CHECK-NEXT: 1 4 1.00 * ldr q14, [x26, w6, uxtw] -# CHECK-NEXT: 1 4 1.00 * ldr q15, [x25, w7, uxtw] -# CHECK-NEXT: 1 4 1.00 * ldr q16, [x24, w8, uxtw #4] -# CHECK-NEXT: 1 4 1.00 * ldr q17, [x23, w9, sxtw] -# CHECK-NEXT: 1 1 1.00 * str q18, [x22, w10, sxtw] -# CHECK-NEXT: 1 4 1.00 * ldr q19, [x21, wzr, sxtw #4] +# CHECK-NEXT: 1 5 1.00 * ldr x15, [x25, w7, uxtw] +# CHECK-NEXT: 1 5 1.00 * ldr x16, [x24, w8, uxtw #3] +# CHECK-NEXT: 1 5 1.00 * ldr x17, [x23, w9, sxtw] +# CHECK-NEXT: 1 5 1.00 * ldr x18, [x22, w10, sxtw] +# CHECK-NEXT: 1 2 1.00 * str d19, [x21, wzr, sxtw #3] +# CHECK-NEXT: 1 5 1.00 * ldr q3, [sp, x5] +# CHECK-NEXT: 1 5 1.00 * ldr q9, [x27, x6] +# CHECK-NEXT: 1 5 1.00 * ldr q10, [x30, x7, lsl #4] +# CHECK-NEXT: 1 2 1.00 * str q11, [x29, x3, sxtx] +# CHECK-NEXT: 1 2 1.00 * str q12, [x28, xzr, sxtx] +# CHECK-NEXT: 1 2 1.00 * str q13, [x27, x5, sxtx #4] +# CHECK-NEXT: 1 5 1.00 * ldr q14, [x26, w6, uxtw] +# CHECK-NEXT: 1 5 1.00 * ldr q15, [x25, w7, uxtw] +# CHECK-NEXT: 1 5 1.00 * ldr q16, [x24, w8, uxtw #4] +# CHECK-NEXT: 1 5 1.00 * ldr q17, [x23, w9, sxtw] +# CHECK-NEXT: 1 2 1.00 * str q18, [x22, w10, sxtw] +# CHECK-NEXT: 1 5 1.00 * ldr q19, [x21, wzr, sxtw #4] # CHECK-NEXT: 2 4 1.00 * ldp w3, w5, [sp] # CHECK-NEXT: 1 1 1.00 * stp wzr, w9, [sp, #252] # CHECK-NEXT: 2 4 1.00 * ldp w2, wzr, [sp, #-256] @@ -2482,50 +2482,50 @@ # CHECK-NEXT: 1 1 1.00 * stnp q3, q5, [sp] # CHECK-NEXT: 1 1 1.00 * stnp q17, q19, [sp, #1008] # CHECK-NEXT: 2 5 2.00 * ldnp q23, q29, [x1, #-1024] -# CHECK-NEXT: 1 3 0.50 mov w3, #983055 -# CHECK-NEXT: 1 3 0.50 mov x10, #-6148914691236517206 -# CHECK-NEXT: 1 3 0.50 and w12, w23, w21 -# CHECK-NEXT: 1 3 0.50 and w16, w15, w1, lsl #1 -# CHECK-NEXT: 1 3 0.50 and w9, w4, w10, lsl #31 -# CHECK-NEXT: 1 3 0.50 and w3, w30, w11 -# CHECK-NEXT: 1 3 0.50 and x3, x5, x7, lsl #63 -# CHECK-NEXT: 1 3 0.50 and x5, x14, x19, asr #4 -# CHECK-NEXT: 1 3 0.50 and w3, w17, w19, ror #31 -# CHECK-NEXT: 1 3 0.50 and w0, w2, wzr, lsr #17 -# CHECK-NEXT: 1 3 0.50 and w3, w30, w11, asr #2 -# CHECK-NEXT: 1 3 0.50 and xzr, x4, x26 -# CHECK-NEXT: 1 3 0.50 and w3, wzr, w20, ror #2 -# CHECK-NEXT: 1 3 0.50 and x7, x20, xzr, asr #63 -# CHECK-NEXT: 1 3 0.50 bic x13, x20, x14, lsl #47 -# CHECK-NEXT: 1 3 0.50 bic w2, w7, w9 -# CHECK-NEXT: 1 3 0.50 orr w2, w7, w0, asr #31 -# CHECK-NEXT: 1 3 0.50 orr x8, x9, x10, lsl #12 -# CHECK-NEXT: 1 3 0.50 orn x3, x5, x7, asr #2 -# CHECK-NEXT: 1 3 0.50 orn w2, w5, w29 -# CHECK-NEXT: 1 3 0.50 ands w7, wzr, w9, lsl #1 -# CHECK-NEXT: 1 3 0.50 ands x3, x5, x20, ror #63 -# CHECK-NEXT: 1 3 0.50 bics w3, w5, w7 -# CHECK-NEXT: 1 3 0.50 bics x3, xzr, x3, lsl #1 -# CHECK-NEXT: 1 3 0.50 tst w3, w7, lsl #31 -# CHECK-NEXT: 1 3 0.50 tst x2, x20, asr #2 -# CHECK-NEXT: 1 3 0.50 mov x3, x6 -# CHECK-NEXT: 1 3 0.50 mov x3, xzr -# CHECK-NEXT: 1 3 0.50 mov wzr, w2 -# CHECK-NEXT: 1 3 0.50 mov w3, w5 -# CHECK-NEXT: 1 3 0.50 movz w2, #0, lsl #16 -# CHECK-NEXT: 1 3 0.50 mov w2, #-1235 -# CHECK-NEXT: 1 3 0.50 mov x2, #5299989643264 -# CHECK-NEXT: 1 3 0.50 mov x2, #0 -# CHECK-NEXT: 1 3 0.50 movk w3, #0 -# CHECK-NEXT: 1 3 0.50 movz x4, #0, lsl #16 -# CHECK-NEXT: 1 3 0.50 movk w5, #0, lsl #16 -# CHECK-NEXT: 1 3 0.50 movz x6, #0, lsl #32 -# CHECK-NEXT: 1 3 0.50 movk x7, #0, lsl #32 -# CHECK-NEXT: 1 3 0.50 movz x8, #0, lsl #48 -# CHECK-NEXT: 1 3 0.50 movk x9, #0, lsl #48 -# CHECK-NEXT: 1 3 0.50 adr x2, #1600 -# CHECK-NEXT: 1 3 0.50 adrp x21, #6553600 -# CHECK-NEXT: 1 3 0.50 adr x0, #262144 +# CHECK-NEXT: 1 1 0.50 mov w3, #983055 +# CHECK-NEXT: 1 1 0.50 mov x10, #-6148914691236517206 +# CHECK-NEXT: 1 1 0.50 and w12, w23, w21 +# CHECK-NEXT: 1 2 0.50 and w16, w15, w1, lsl #1 +# CHECK-NEXT: 1 2 0.50 and w9, w4, w10, lsl #31 +# CHECK-NEXT: 1 1 0.50 and w3, w30, w11 +# CHECK-NEXT: 1 2 0.50 and x3, x5, x7, lsl #63 +# CHECK-NEXT: 1 2 0.50 and x5, x14, x19, asr #4 +# CHECK-NEXT: 1 2 0.50 and w3, w17, w19, ror #31 +# CHECK-NEXT: 1 2 0.50 and w0, w2, wzr, lsr #17 +# CHECK-NEXT: 1 2 0.50 and w3, w30, w11, asr #2 +# CHECK-NEXT: 1 1 0.50 and xzr, x4, x26 +# CHECK-NEXT: 1 2 0.50 and w3, wzr, w20, ror #2 +# CHECK-NEXT: 1 2 0.50 and x7, x20, xzr, asr #63 +# CHECK-NEXT: 1 2 0.50 bic x13, x20, x14, lsl #47 +# CHECK-NEXT: 1 1 0.50 bic w2, w7, w9 +# CHECK-NEXT: 1 2 0.50 orr w2, w7, w0, asr #31 +# CHECK-NEXT: 1 2 0.50 orr x8, x9, x10, lsl #12 +# CHECK-NEXT: 1 2 0.50 orn x3, x5, x7, asr #2 +# CHECK-NEXT: 1 1 0.50 orn w2, w5, w29 +# CHECK-NEXT: 1 2 0.50 ands w7, wzr, w9, lsl #1 +# CHECK-NEXT: 1 2 0.50 ands x3, x5, x20, ror #63 +# CHECK-NEXT: 1 1 0.50 bics w3, w5, w7 +# CHECK-NEXT: 1 2 0.50 bics x3, xzr, x3, lsl #1 +# CHECK-NEXT: 1 2 0.50 tst w3, w7, lsl #31 +# CHECK-NEXT: 1 2 0.50 tst x2, x20, asr #2 +# CHECK-NEXT: 1 1 0.50 mov x3, x6 +# CHECK-NEXT: 1 1 0.50 mov x3, xzr +# CHECK-NEXT: 1 1 0.50 mov wzr, w2 +# CHECK-NEXT: 1 1 0.50 mov w3, w5 +# CHECK-NEXT: 1 1 0.50 movz w2, #0, lsl #16 +# CHECK-NEXT: 1 1 0.50 mov w2, #-1235 +# CHECK-NEXT: 1 1 0.50 mov x2, #5299989643264 +# CHECK-NEXT: 1 1 0.50 mov x2, #0 +# CHECK-NEXT: 1 1 0.50 movk w3, #0 +# CHECK-NEXT: 1 1 0.50 movz x4, #0, lsl #16 +# CHECK-NEXT: 1 1 0.50 movk w5, #0, lsl #16 +# CHECK-NEXT: 1 1 0.50 movz x6, #0, lsl #32 +# CHECK-NEXT: 1 1 0.50 movk x7, #0, lsl #32 +# CHECK-NEXT: 1 1 0.50 movz x8, #0, lsl #48 +# CHECK-NEXT: 1 1 0.50 movk x9, #0, lsl #48 +# CHECK-NEXT: 1 1 0.50 adr x2, #1600 +# CHECK-NEXT: 1 1 0.50 adrp x21, #6553600 +# CHECK-NEXT: 1 1 0.50 adr x0, #262144 # CHECK-NEXT: 1 1 1.00 tbz x12, #62, #0 # CHECK-NEXT: 1 1 1.00 tbz x12, #62, #4 # CHECK-NEXT: 1 1 1.00 tbz x12, #62, #-32768 Index: llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s @@ -10,12 +10,12 @@ # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 12 -# CHECK-NEXT: Total Cycles: 19 +# CHECK-NEXT: Total Cycles: 20 # CHECK-NEXT: Total uOps: 12 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.63 -# CHECK-NEXT: IPC: 0.63 +# CHECK-NEXT: uOps Per Cycle: 0.60 +# CHECK-NEXT: IPC: 0.60 # CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Instruction Info: @@ -28,11 +28,11 @@ # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 8 8.00 sdiv w12, w21, w0 -# CHECK-NEXT: 1 3 0.50 add w8, w8, #1 -# CHECK-NEXT: 1 3 0.50 add w1, w2, w0 -# CHECK-NEXT: 1 3 0.50 add w3, w4, #1 -# CHECK-NEXT: 1 3 0.50 add w5, w6, w0 -# CHECK-NEXT: 1 3 0.50 add w7, w9, w0 +# CHECK-NEXT: 1 1 0.50 add w8, w8, #1 +# CHECK-NEXT: 1 1 0.50 add w1, w2, w0 +# CHECK-NEXT: 1 1 0.50 add w3, w4, #1 +# CHECK-NEXT: 1 1 0.50 add w5, w6, w0 +# CHECK-NEXT: 1 1 0.50 add w7, w9, w0 # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 @@ -40,27 +40,27 @@ # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 -# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 1 (5.3%) +# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK-NEXT: USH - Uncategorised Structural Hazard: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 11 (57.9%) -# CHECK-NEXT: 1, 4 (21.1%) -# CHECK-NEXT: 2, 4 (21.1%) +# CHECK-NEXT: 0, 13 (65.0%) +# CHECK-NEXT: 1, 2 (10.0%) +# CHECK-NEXT: 2, 5 (25.0%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 11 (57.9%) -# CHECK-NEXT: 1, 4 (21.1%) -# CHECK-NEXT: 2, 4 (21.1%) +# CHECK-NEXT: 0, 13 (65.0%) +# CHECK-NEXT: 1, 2 (10.0%) +# CHECK-NEXT: 2, 5 (25.0%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: No scheduler resources used. # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 12 -# CHECK-NEXT: Max number of mappings used: 6 +# CHECK-NEXT: Max number of mappings used: 3 # CHECK: Resources: # CHECK-NEXT: [0.0] - CortexA55UnitALU @@ -90,21 +90,21 @@ # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - add w7, w9, w0 # CHECK: Timeline view: -# CHECK-NEXT: 012345678 +# CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeE . . . sdiv w12, w21, w0 -# CHECK-NEXT: [0,1] . DeeE . . . add w8, w8, #1 -# CHECK-NEXT: [0,2] . DeeE . . . add w1, w2, w0 -# CHECK-NEXT: [0,3] . .DeeE. . . add w3, w4, #1 -# CHECK-NEXT: [0,4] . .DeeE. . . add w5, w6, w0 -# CHECK-NEXT: [0,5] . . DeeE . . add w7, w9, w0 -# CHECK-NEXT: [1,0] . . DeeeeeeeE . sdiv w12, w21, w0 -# CHECK-NEXT: [1,1] . . . DeeE . add w8, w8, #1 -# CHECK-NEXT: [1,2] . . . DeeE . add w1, w2, w0 -# CHECK-NEXT: [1,3] . . . DeeE. add w3, w4, #1 -# CHECK-NEXT: [1,4] . . . DeeE. add w5, w6, w0 -# CHECK-NEXT: [1,5] . . . DeeE add w7, w9, w0 +# CHECK: [0,0] DeeeeeeeE . . . sdiv w12, w21, w0 +# CHECK-NEXT: [0,1] . . DE . . . add w8, w8, #1 +# CHECK-NEXT: [0,2] . . DE . . . add w1, w2, w0 +# CHECK-NEXT: [0,3] . . DE. . . add w3, w4, #1 +# CHECK-NEXT: [0,4] . . DE. . . add w5, w6, w0 +# CHECK-NEXT: [0,5] . . DE . . add w7, w9, w0 +# CHECK-NEXT: [1,0] . . DeeeeeeeE . sdiv w12, w21, w0 +# CHECK-NEXT: [1,1] . . . .DE . add w8, w8, #1 +# CHECK-NEXT: [1,2] . . . .DE . add w1, w2, w0 +# CHECK-NEXT: [1,3] . . . . DE. add w3, w4, #1 +# CHECK-NEXT: [1,4] . . . . DE. add w5, w6, w0 +# CHECK-NEXT: [1,5] . . . . DE add w7, w9, w0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions Index: llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s @@ -161,12 +161,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 15400 -# CHECK-NEXT: Total Cycles: 30303 +# CHECK-NEXT: Total Cycles: 28802 # CHECK-NEXT: Total uOps: 20900 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.69 -# CHECK-NEXT: IPC: 0.51 +# CHECK-NEXT: uOps Per Cycle: 0.73 +# CHECK-NEXT: IPC: 0.53 # CHECK-NEXT: Block RThroughput: 104.5 # CHECK: Instruction Info: @@ -178,159 +178,159 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldr b0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr b0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr b0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr d0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr d0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldr d0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr h0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr h0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldr h0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr q0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr q0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldr q0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr s0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr s0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldr s0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr w0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr w0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldr w0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr x0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldr x0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldr x0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldrb w0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldrb w0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldrb w0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldrsb w0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldrsb w0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldrsb w0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldrh w0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldrh w0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldrh w0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldrsh w0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldrsh w0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldrsh w0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldrsw x0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 3 1.00 * ldrsw x0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldrsw x0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 4 1.00 * ldr d0, [x2, x2, lsl #3] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 4 1.00 * ldr q0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 4 1.00 * ldr w0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 4 1.00 * ldr x0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 4 1.00 * ldrb w0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 4 1.00 * ldrsb w0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 4 1.00 * ldrh w0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 4 1.00 * ldrsh w0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 4 1.00 * ldrsw x0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 5 1.00 * ldr d0, [x2, x2, lsl #3] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 5 1.00 * ldr q0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 5 1.00 * ldr w0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 5 1.00 * ldr x0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 5 1.00 * ldrb w0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 5 1.00 * ldrsb w0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 5 1.00 * ldrh w0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 5 1.00 * ldrsh w0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 5 1.00 * ldrsw x0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldur b0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldur d0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldur h0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldur q0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldur s0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldur w0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldurb w0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldurh w0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldursb w0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldursh w0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 3 1.00 * ldursw x0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldnp d0, d1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldnp q0, q1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldnp s0, s1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldnp s0, s1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldnp w0, w1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldnp x0, x1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldp d0, d1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 3 5 2.00 * ldp d0, d1, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 3 5 2.00 * ldp d0, d1, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 6 6.00 * ldp q0, q1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 3 6 6.00 * ldp q0, q1, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 3 6 6.00 * ldp q0, q1, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 4 1.00 * ldp s0, s1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 3 4 1.00 * ldp s0, s1, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 3 4 1.00 * ldp s0, s1, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldp x0, x1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 3 5 2.00 * ldp x0, x1, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 3 5 2.00 * ldp x0, x1, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 4 1.00 * ldpsw x0, x1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 3 4 1.00 * ldpsw x0, x1, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 3 4 1.00 * ldpsw x0, x1, [x2], #16 # CHECK: Resources: @@ -509,163 +509,163 @@ # CHECK-NEXT: - - - - - - - - - 1.00 - - ldpsw x0, x1, [x2], #16 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345 +# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 -# CHECK: [0,0] DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,1] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16] -# CHECK-NEXT: [0,2] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,3] . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16]! -# CHECK-NEXT: [0,4] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,5] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2], #16 -# CHECK-NEXT: [0,6] . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,7] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2], #16 -# CHECK-NEXT: [0,8] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,9] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16]! -# CHECK-NEXT: [0,10] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,11] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16] -# CHECK-NEXT: [0,12] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,13] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2], #16 -# CHECK-NEXT: [0,14] . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,15] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16]! -# CHECK-NEXT: [0,16] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,17] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16] -# CHECK-NEXT: [0,18] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,19] . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2], #16 -# CHECK-NEXT: [0,20] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,21] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16]! -# CHECK-NEXT: [0,22] . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,23] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16] -# CHECK-NEXT: [0,24] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,25] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2], #16 -# CHECK-NEXT: [0,26] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,27] . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16]! -# CHECK-NEXT: [0,28] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,29] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16] -# CHECK-NEXT: [0,30] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,31] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2], #16 -# CHECK-NEXT: [0,32] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,33] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16]! -# CHECK-NEXT: [0,34] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,35] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16] -# CHECK-NEXT: [0,36] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,37] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2], #16 -# CHECK-NEXT: [0,38] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,39] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16]! -# CHECK-NEXT: [0,40] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,41] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16] -# CHECK-NEXT: [0,42] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,43] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2], #16 -# CHECK-NEXT: [0,44] . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,45] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16]! -# CHECK-NEXT: [0,46] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,47] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16] -# CHECK-NEXT: [0,48] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,49] . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2], #16 -# CHECK-NEXT: [0,50] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,51] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16]! -# CHECK-NEXT: [0,52] . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,53] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16] -# CHECK-NEXT: [0,54] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,55] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2], #16 -# CHECK-NEXT: [0,56] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,57] . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16]! -# CHECK-NEXT: [0,58] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,59] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16] -# CHECK-NEXT: [0,60] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,61] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2], #16 -# CHECK-NEXT: [0,62] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,63] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16]! -# CHECK-NEXT: [0,64] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,65] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16] -# CHECK-NEXT: [0,66] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,67] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2], #16 -# CHECK-NEXT: [0,68] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,69] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16]! -# CHECK-NEXT: [0,70] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,71] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16] -# CHECK-NEXT: [0,72] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,73] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, x2, lsl #3] -# CHECK-NEXT: [0,74] . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,75] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, w0, sxtw] -# CHECK-NEXT: [0,76] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,77] . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,78] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,79] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, w0, sxtw] -# CHECK-NEXT: [0,80] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,81] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,82] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,83] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,84] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,85] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,86] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,87] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,88] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,89] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, w0, sxtw] -# CHECK-NEXT: [0,90] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,91] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . ldur b0, [x2, #255] -# CHECK-NEXT: [0,92] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,93] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur d0, [x2, #255] -# CHECK-NEXT: [0,94] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,95] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur h0, [x2, #255] -# CHECK-NEXT: [0,96] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,97] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . ldur q0, [x2, #255] -# CHECK-NEXT: [0,98] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,99] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . ldur s0, [x2, #255] -# CHECK-NEXT: [0,100] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,101] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . ldur w0, [x2, #255] -# CHECK-NEXT: [0,102] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,103] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurb w0, [x2, #255] -# CHECK-NEXT: [0,104] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,105] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurh w0, [x2, #255] -# CHECK-NEXT: [0,106] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,107] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . ldursb w0, [x2, #255] -# CHECK-NEXT: [0,108] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,109] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . ldursh w0, [x2, #255] -# CHECK-NEXT: [0,110] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,111] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . ldursw x0, [x2, #255] -# CHECK-NEXT: [0,112] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,113] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . . ldnp d0, d1, [x2, #16] -# CHECK-NEXT: [0,114] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,115] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . ldnp q0, q1, [x2, #16] -# CHECK-NEXT: [0,116] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,117] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] -# CHECK-NEXT: [0,118] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,119] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] -# CHECK-NEXT: [0,120] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,121] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . ldnp w0, w1, [x2, #16] -# CHECK-NEXT: [0,122] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,123] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . ldnp x0, x1, [x2, #16] -# CHECK-NEXT: [0,124] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,125] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . ldp d0, d1, [x2, #16] -# CHECK-NEXT: [0,126] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,127] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . ldp d0, d1, [x2, #16]! -# CHECK-NEXT: [0,128] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,129] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . ldp d0, d1, [x2], #16 -# CHECK-NEXT: [0,130] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,131] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . . . . ldp q0, q1, [x2, #16] -# CHECK-NEXT: [0,132] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,133] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE. . . . . . . . . . ldp q0, q1, [x2, #16]! -# CHECK-NEXT: [0,134] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,135] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . ldp q0, q1, [x2], #16 -# CHECK-NEXT: [0,136] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,137] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . ldp s0, s1, [x2, #16] -# CHECK-NEXT: [0,138] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,139] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . ldp s0, s1, [x2, #16]! -# CHECK-NEXT: [0,140] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,141] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . ldp s0, s1, [x2], #16 -# CHECK-NEXT: [0,142] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,143] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . ldp x0, x1, [x2, #16] -# CHECK-NEXT: [0,144] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,145] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . ldp x0, x1, [x2, #16]! -# CHECK-NEXT: [0,146] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . add x2, x3, #1 -# CHECK-NEXT: [0,147] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . ldp x0, x1, [x2], #16 -# CHECK-NEXT: [0,148] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . add x2, x3, #1 -# CHECK-NEXT: [0,149] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . ldpsw x0, x1, [x2, #16] -# CHECK-NEXT: [0,150] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . add x2, x3, #1 -# CHECK-NEXT: [0,151] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . ldpsw x0, x1, [x2, #16]! -# CHECK-NEXT: [0,152] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . add x2, x3, #1 -# CHECK-NEXT: [0,153] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE ldpsw x0, x1, [x2], #16 +# CHECK: [0,0] DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,1] .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16] +# CHECK-NEXT: [0,2] . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,3] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16]! +# CHECK-NEXT: [0,4] . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,5] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2], #16 +# CHECK-NEXT: [0,6] . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,7] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2], #16 +# CHECK-NEXT: [0,8] . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,9] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16]! +# CHECK-NEXT: [0,10] . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,11] . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16] +# CHECK-NEXT: [0,12] . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,13] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2], #16 +# CHECK-NEXT: [0,14] . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,15] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16]! +# CHECK-NEXT: [0,16] . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,17] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16] +# CHECK-NEXT: [0,18] . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,19] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2], #16 +# CHECK-NEXT: [0,20] . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,21] . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16]! +# CHECK-NEXT: [0,22] . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,23] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16] +# CHECK-NEXT: [0,24] . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,25] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2], #16 +# CHECK-NEXT: [0,26] . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,27] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16]! +# CHECK-NEXT: [0,28] . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,29] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16] +# CHECK-NEXT: [0,30] . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,31] . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2], #16 +# CHECK-NEXT: [0,32] . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,33] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16]! +# CHECK-NEXT: [0,34] . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,35] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16] +# CHECK-NEXT: [0,36] . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,37] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2], #16 +# CHECK-NEXT: [0,38] . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,39] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16]! +# CHECK-NEXT: [0,40] . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,41] . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16] +# CHECK-NEXT: [0,42] . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,43] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2], #16 +# CHECK-NEXT: [0,44] . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,45] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16]! +# CHECK-NEXT: [0,46] . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,47] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16] +# CHECK-NEXT: [0,48] . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,49] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2], #16 +# CHECK-NEXT: [0,50] . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,51] . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16]! +# CHECK-NEXT: [0,52] . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,53] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16] +# CHECK-NEXT: [0,54] . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,55] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2], #16 +# CHECK-NEXT: [0,56] . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,57] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16]! +# CHECK-NEXT: [0,58] . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,59] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16] +# CHECK-NEXT: [0,60] . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,61] . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2], #16 +# CHECK-NEXT: [0,62] . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,63] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16]! +# CHECK-NEXT: [0,64] . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,65] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16] +# CHECK-NEXT: [0,66] . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,67] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2], #16 +# CHECK-NEXT: [0,68] . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,69] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16]! +# CHECK-NEXT: [0,70] . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,71] . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16] +# CHECK-NEXT: [0,72] . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,73] . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, x2, lsl #3] +# CHECK-NEXT: [0,74] . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,75] . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, w0, sxtw] +# CHECK-NEXT: [0,76] . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,77] . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,78] . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,79] . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, w0, sxtw] +# CHECK-NEXT: [0,80] . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,81] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,82] . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,83] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,84] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,85] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,86] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,87] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,88] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,89] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, w0, sxtw] +# CHECK-NEXT: [0,90] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,91] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . ldur b0, [x2, #255] +# CHECK-NEXT: [0,92] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,93] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur d0, [x2, #255] +# CHECK-NEXT: [0,94] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,95] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur h0, [x2, #255] +# CHECK-NEXT: [0,96] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,97] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . ldur q0, [x2, #255] +# CHECK-NEXT: [0,98] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,99] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . ldur s0, [x2, #255] +# CHECK-NEXT: [0,100] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,101] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . ldur w0, [x2, #255] +# CHECK-NEXT: [0,102] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,103] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurb w0, [x2, #255] +# CHECK-NEXT: [0,104] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,105] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurh w0, [x2, #255] +# CHECK-NEXT: [0,106] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,107] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . ldursb w0, [x2, #255] +# CHECK-NEXT: [0,108] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,109] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . ldursh w0, [x2, #255] +# CHECK-NEXT: [0,110] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,111] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . ldursw x0, [x2, #255] +# CHECK-NEXT: [0,112] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,113] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . . ldnp d0, d1, [x2, #16] +# CHECK-NEXT: [0,114] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,115] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . ldnp q0, q1, [x2, #16] +# CHECK-NEXT: [0,116] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,117] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] +# CHECK-NEXT: [0,118] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,119] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] +# CHECK-NEXT: [0,120] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,121] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . ldnp w0, w1, [x2, #16] +# CHECK-NEXT: [0,122] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,123] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . ldnp x0, x1, [x2, #16] +# CHECK-NEXT: [0,124] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,125] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . ldp d0, d1, [x2, #16] +# CHECK-NEXT: [0,126] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,127] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . ldp d0, d1, [x2, #16]! +# CHECK-NEXT: [0,128] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,129] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . ldp d0, d1, [x2], #16 +# CHECK-NEXT: [0,130] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,131] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . . . . ldp q0, q1, [x2, #16] +# CHECK-NEXT: [0,132] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,133] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE. . . . . . . . . . ldp q0, q1, [x2, #16]! +# CHECK-NEXT: [0,134] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,135] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . ldp q0, q1, [x2], #16 +# CHECK-NEXT: [0,136] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,137] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . ldp s0, s1, [x2, #16] +# CHECK-NEXT: [0,138] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,139] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . ldp s0, s1, [x2, #16]! +# CHECK-NEXT: [0,140] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,141] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . ldp s0, s1, [x2], #16 +# CHECK-NEXT: [0,142] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,143] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . ldp x0, x1, [x2, #16] +# CHECK-NEXT: [0,144] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,145] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . ldp x0, x1, [x2, #16]! +# CHECK-NEXT: [0,146] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . add x2, x3, #1 +# CHECK-NEXT: [0,147] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . ldp x0, x1, [x2], #16 +# CHECK-NEXT: [0,148] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . add x2, x3, #1 +# CHECK-NEXT: [0,149] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . ldpsw x0, x1, [x2, #16] +# CHECK-NEXT: [0,150] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . add x2, x3, #1 +# CHECK-NEXT: [0,151] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . ldpsw x0, x1, [x2, #16]! +# CHECK-NEXT: [0,152] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . add x2, x3, #1 +# CHECK-NEXT: [0,153] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE ldpsw x0, x1, [x2], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions Index: llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s @@ -28,11 +28,11 @@ # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 13 10.00 fdiv s1, s2, s3 -# CHECK-NEXT: 1 3 0.50 add w8, w8, #1 -# CHECK-NEXT: 1 3 0.50 add w1, w2, w0 -# CHECK-NEXT: 1 3 0.50 add w3, w4, #1 -# CHECK-NEXT: 1 3 0.50 add w5, w6, w0 -# CHECK-NEXT: 1 3 0.50 add w7, w9, w0 +# CHECK-NEXT: 1 1 0.50 add w8, w8, #1 +# CHECK-NEXT: 1 1 0.50 add w1, w2, w0 +# CHECK-NEXT: 1 1 0.50 add w3, w4, #1 +# CHECK-NEXT: 1 1 0.50 add w5, w6, w0 +# CHECK-NEXT: 1 1 0.50 add w7, w9, w0 # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 @@ -58,7 +58,7 @@ # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 12 -# CHECK-NEXT: Max number of mappings used: 7 +# CHECK-NEXT: Max number of mappings used: 4 # CHECK: Resources: # CHECK-NEXT: [0.0] - CortexA55UnitALU @@ -92,17 +92,17 @@ # CHECK-NEXT: Index 0123456789 0123 # CHECK: [0,0] DeeeeeeeeeeeeE . . . fdiv s1, s2, s3 -# CHECK-NEXT: [0,1] DeeE . . . . . add w8, w8, #1 -# CHECK-NEXT: [0,2] .DeeE. . . . . add w1, w2, w0 -# CHECK-NEXT: [0,3] .DeeE. . . . . add w3, w4, #1 -# CHECK-NEXT: [0,4] . DeeE . . . . add w5, w6, w0 -# CHECK-NEXT: [0,5] . DeeE . . . . add w7, w9, w0 +# CHECK-NEXT: [0,1] DE . . . . . add w8, w8, #1 +# CHECK-NEXT: [0,2] .DE . . . . . add w1, w2, w0 +# CHECK-NEXT: [0,3] .DE . . . . . add w3, w4, #1 +# CHECK-NEXT: [0,4] . DE . . . . . add w5, w6, w0 +# CHECK-NEXT: [0,5] . DE . . . . . add w7, w9, w0 # CHECK-NEXT: [1,0] . . DeeeeeeeeeeeeE fdiv s1, s2, s3 -# CHECK-NEXT: [1,1] . . DeeE . . . add w8, w8, #1 -# CHECK-NEXT: [1,2] . . .DeeE. . . add w1, w2, w0 -# CHECK-NEXT: [1,3] . . .DeeE. . . add w3, w4, #1 -# CHECK-NEXT: [1,4] . . . DeeE . . add w5, w6, w0 -# CHECK-NEXT: [1,5] . . . DeeE . . add w7, w9, w0 +# CHECK-NEXT: [1,1] . . DE . . . add w8, w8, #1 +# CHECK-NEXT: [1,2] . . .DE . . . add w1, w2, w0 +# CHECK-NEXT: [1,3] . . .DE . . . add w3, w4, #1 +# CHECK-NEXT: [1,4] . . . DE . . . add w5, w6, w0 +# CHECK-NEXT: [1,5] . . . DE . . . add w7, w9, w0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions Index: llvm/test/tools/llvm-mca/AArch64/Cortex/A55-store-readadv.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/A55-store-readadv.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/A55-store-readadv.s @@ -125,12 +125,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 11800 -# CHECK-NEXT: Total Cycles: 19801 +# CHECK-NEXT: Total Cycles: 9101 # CHECK-NEXT: Total uOps: 14400 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.73 -# CHECK-NEXT: IPC: 0.60 +# CHECK-NEXT: uOps Per Cycle: 1.58 +# CHECK-NEXT: IPC: 1.30 # CHECK-NEXT: Block RThroughput: 72.0 # CHECK: Instruction Info: @@ -142,123 +142,123 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * str b0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str b0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str b0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str d0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str d0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * str d0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str h0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str h0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * str h0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str q0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str q0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * str q0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str s0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str s0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * str s0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str w0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str w0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * str w0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str x0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * str x0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * str x0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * strb w0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * strb w0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * strb w0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * strh w0, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * strh w0, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * strh w0, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 1 1.00 * str d0, [x2, x2, lsl #3] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 1 1.00 * str q0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 1 1.00 * str w0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 1 1.00 * str x0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 1 1.00 * strb w0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 1 1 1.00 * strh w0, [x2, w0, sxtw] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 2 1.00 * str d0, [x2, x2, lsl #3] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 2 1.00 * str q0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 2 1.00 * str w0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 2 1.00 * str x0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 2 1.00 * strb w0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 2 1.00 * strh w0, [x2, w0, sxtw] +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stur b0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stur d0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stur h0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stur q0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stur s0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stur w0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * sturb w0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * sturh w0, [x2, #255] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stnp d0, d1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stnp q0, q1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stnp s0, s1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stnp s0, s1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stnp w0, w1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stnp x0, x1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stp d0, d1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * stp d0, d1, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * stp d0, d1, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stp q0, q1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * stp q0, q1, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * stp q0, q1, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stp s0, s1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * stp s0, s1, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * stp s0, s1, [x2], #16 -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 1 1 1.00 * stp x0, x1, [x2, #16] -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * stp x0, x1, [x2, #16]! -# CHECK-NEXT: 1 3 0.50 add x2, x3, #1 +# CHECK-NEXT: 1 1 0.50 add x2, x3, #1 # CHECK-NEXT: 2 1 1.00 * stp x0, x1, [x2], #16 # CHECK: Resources: @@ -401,127 +401,127 @@ # CHECK-NEXT: - - - - - - - - - - - 1.00 stp x0, x1, [x2], #16 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 +# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 01 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 -# CHECK: [0,0] DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,1] . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str b0, [x2, #16] -# CHECK-NEXT: [0,2] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,3] . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str b0, [x2, #16]! -# CHECK-NEXT: [0,4] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,5] . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str b0, [x2], #16 -# CHECK-NEXT: [0,6] . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,7] . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str d0, [x2], #16 -# CHECK-NEXT: [0,8] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,9] . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str d0, [x2, #16]! -# CHECK-NEXT: [0,10] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,11] . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str d0, [x2, #16] -# CHECK-NEXT: [0,12] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,13] . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str h0, [x2], #16 -# CHECK-NEXT: [0,14] . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,15] . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str h0, [x2, #16]! -# CHECK-NEXT: [0,16] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,17] . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str h0, [x2, #16] -# CHECK-NEXT: [0,18] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,19] . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str q0, [x2], #16 -# CHECK-NEXT: [0,20] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,21] . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str q0, [x2, #16]! -# CHECK-NEXT: [0,22] . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,23] . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str q0, [x2, #16] -# CHECK-NEXT: [0,24] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,25] . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str s0, [x2], #16 -# CHECK-NEXT: [0,26] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,27] . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str s0, [x2, #16]! -# CHECK-NEXT: [0,28] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,29] . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . str s0, [x2, #16] -# CHECK-NEXT: [0,30] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,31] . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . str w0, [x2], #16 -# CHECK-NEXT: [0,32] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,33] . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . . . str w0, [x2, #16]! -# CHECK-NEXT: [0,34] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,35] . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . . . . . . str w0, [x2, #16] -# CHECK-NEXT: [0,36] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,37] . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . . str x0, [x2], #16 -# CHECK-NEXT: [0,38] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,39] . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . . . . str x0, [x2, #16]! -# CHECK-NEXT: [0,40] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,41] . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . . str x0, [x2, #16] -# CHECK-NEXT: [0,42] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,43] . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . . strb w0, [x2], #16 -# CHECK-NEXT: [0,44] . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,45] . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . . . strb w0, [x2, #16]! -# CHECK-NEXT: [0,46] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,47] . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . . . . . strb w0, [x2, #16] -# CHECK-NEXT: [0,48] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,49] . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . . . . strh w0, [x2], #16 -# CHECK-NEXT: [0,50] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,51] . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . . strh w0, [x2, #16]! -# CHECK-NEXT: [0,52] . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,53] . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . strh w0, [x2, #16] -# CHECK-NEXT: [0,54] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,55] . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . . str d0, [x2, x2, lsl #3] -# CHECK-NEXT: [0,56] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,57] . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . . str q0, [x2, w0, sxtw] -# CHECK-NEXT: [0,58] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,59] . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . . . . str w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,60] . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,61] . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . . . str x0, [x2, w0, sxtw] -# CHECK-NEXT: [0,62] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,63] . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . strb w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,64] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,65] . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . . strh w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,66] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,67] . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . . . stur b0, [x2, #255] -# CHECK-NEXT: [0,68] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,69] . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . . . . stur d0, [x2, #255] -# CHECK-NEXT: [0,70] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,71] . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . . . . stur h0, [x2, #255] -# CHECK-NEXT: [0,72] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,73] . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . stur q0, [x2, #255] -# CHECK-NEXT: [0,74] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,75] . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . . stur s0, [x2, #255] -# CHECK-NEXT: [0,76] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,77] . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . . . stur w0, [x2, #255] -# CHECK-NEXT: [0,78] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,79] . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . . . . sturb w0, [x2, #255] -# CHECK-NEXT: [0,80] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,81] . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . . . . sturh w0, [x2, #255] -# CHECK-NEXT: [0,82] . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,83] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . stnp d0, d1, [x2, #16] -# CHECK-NEXT: [0,84] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,85] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . . stnp q0, q1, [x2, #16] -# CHECK-NEXT: [0,86] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,87] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . . . stnp s0, s1, [x2, #16] -# CHECK-NEXT: [0,88] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,89] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . . . . . stnp s0, s1, [x2, #16] -# CHECK-NEXT: [0,90] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,91] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . . . . . stnp w0, w1, [x2, #16] -# CHECK-NEXT: [0,92] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,93] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . stnp x0, x1, [x2, #16] -# CHECK-NEXT: [0,94] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,95] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . . stp d0, d1, [x2, #16] -# CHECK-NEXT: [0,96] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,97] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . . stp d0, d1, [x2, #16]! -# CHECK-NEXT: [0,98] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,99] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . . . stp d0, d1, [x2], #16 -# CHECK-NEXT: [0,100] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,101] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . . . . . . stp q0, q1, [x2, #16] -# CHECK-NEXT: [0,102] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,103] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . . . . stp q0, q1, [x2, #16]! -# CHECK-NEXT: [0,104] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,105] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . . stp q0, q1, [x2], #16 -# CHECK-NEXT: [0,106] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . add x2, x3, #1 -# CHECK-NEXT: [0,107] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . stp s0, s1, [x2, #16] -# CHECK-NEXT: [0,108] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . add x2, x3, #1 -# CHECK-NEXT: [0,109] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . . . stp s0, s1, [x2, #16]! -# CHECK-NEXT: [0,110] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . add x2, x3, #1 -# CHECK-NEXT: [0,111] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DE . . . stp s0, s1, [x2], #16 -# CHECK-NEXT: [0,112] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . add x2, x3, #1 -# CHECK-NEXT: [0,113] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE . . stp x0, x1, [x2, #16] -# CHECK-NEXT: [0,114] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . add x2, x3, #1 -# CHECK-NEXT: [0,115] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE. . stp x0, x1, [x2, #16]! -# CHECK-NEXT: [0,116] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE. add x2, x3, #1 -# CHECK-NEXT: [0,117] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DE stp x0, x1, [x2], #16 +# CHECK: [0,0] DE . . . . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,1] .DE . . . . . . . . . . . . . . . . . .. str b0, [x2, #16] +# CHECK-NEXT: [0,2] .DE . . . . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,3] . DE . . . . . . . . . . . . . . . . . .. str b0, [x2, #16]! +# CHECK-NEXT: [0,4] . DE. . . . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,5] . DE . . . . . . . . . . . . . . . . .. str b0, [x2], #16 +# CHECK-NEXT: [0,6] . DE . . . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,7] . .DE . . . . . . . . . . . . . . . . .. str d0, [x2], #16 +# CHECK-NEXT: [0,8] . . DE . . . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,9] . . DE. . . . . . . . . . . . . . . . .. str d0, [x2, #16]! +# CHECK-NEXT: [0,10] . . DE . . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,11] . . DE . . . . . . . . . . . . . . . .. str d0, [x2, #16] +# CHECK-NEXT: [0,12] . . DE . . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,13] . . .DE . . . . . . . . . . . . . . . .. str h0, [x2], #16 +# CHECK-NEXT: [0,14] . . . DE . . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,15] . . . DE. . . . . . . . . . . . . . . .. str h0, [x2, #16]! +# CHECK-NEXT: [0,16] . . . DE . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,17] . . . DE . . . . . . . . . . . . . . .. str h0, [x2, #16] +# CHECK-NEXT: [0,18] . . . DE . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,19] . . . .DE . . . . . . . . . . . . . . .. str q0, [x2], #16 +# CHECK-NEXT: [0,20] . . . . DE . . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,21] . . . . DE. . . . . . . . . . . . . . .. str q0, [x2, #16]! +# CHECK-NEXT: [0,22] . . . . DE . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,23] . . . . DE . . . . . . . . . . . . . .. str q0, [x2, #16] +# CHECK-NEXT: [0,24] . . . . DE . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,25] . . . . .DE . . . . . . . . . . . . . .. str s0, [x2], #16 +# CHECK-NEXT: [0,26] . . . . . DE . . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,27] . . . . . DE. . . . . . . . . . . . . .. str s0, [x2, #16]! +# CHECK-NEXT: [0,28] . . . . . DE . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,29] . . . . . DE . . . . . . . . . . . . .. str s0, [x2, #16] +# CHECK-NEXT: [0,30] . . . . . DE . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,31] . . . . . .DE . . . . . . . . . . . . .. str w0, [x2], #16 +# CHECK-NEXT: [0,32] . . . . . . DE . . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,33] . . . . . . DE. . . . . . . . . . . . .. str w0, [x2, #16]! +# CHECK-NEXT: [0,34] . . . . . . DE . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,35] . . . . . . DE . . . . . . . . . . . .. str w0, [x2, #16] +# CHECK-NEXT: [0,36] . . . . . . DE . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,37] . . . . . . .DE . . . . . . . . . . . .. str x0, [x2], #16 +# CHECK-NEXT: [0,38] . . . . . . . DE . . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,39] . . . . . . . DE. . . . . . . . . . . .. str x0, [x2, #16]! +# CHECK-NEXT: [0,40] . . . . . . . DE . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,41] . . . . . . . DE . . . . . . . . . . .. str x0, [x2, #16] +# CHECK-NEXT: [0,42] . . . . . . . DE . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,43] . . . . . . . .DE . . . . . . . . . . .. strb w0, [x2], #16 +# CHECK-NEXT: [0,44] . . . . . . . . DE . . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,45] . . . . . . . . DE. . . . . . . . . . .. strb w0, [x2, #16]! +# CHECK-NEXT: [0,46] . . . . . . . . DE . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,47] . . . . . . . . DE . . . . . . . . . .. strb w0, [x2, #16] +# CHECK-NEXT: [0,48] . . . . . . . . DE . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,49] . . . . . . . . .DE . . . . . . . . . .. strh w0, [x2], #16 +# CHECK-NEXT: [0,50] . . . . . . . . . DE . . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,51] . . . . . . . . . DE. . . . . . . . . .. strh w0, [x2, #16]! +# CHECK-NEXT: [0,52] . . . . . . . . . DE . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,53] . . . . . . . . . DE . . . . . . . . .. strh w0, [x2, #16] +# CHECK-NEXT: [0,54] . . . . . . . . . DE . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,55] . . . . . . . . . . DE . . . . . . . . .. str d0, [x2, x2, lsl #3] +# CHECK-NEXT: [0,56] . . . . . . . . . . DE . . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,57] . . . . . . . . . . DE . . . . . . . .. str q0, [x2, w0, sxtw] +# CHECK-NEXT: [0,58] . . . . . . . . . . DE . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,59] . . . . . . . . . . .DE . . . . . . . .. str w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,60] . . . . . . . . . . .DE . . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,61] . . . . . . . . . . . DE. . . . . . . .. str x0, [x2, w0, sxtw] +# CHECK-NEXT: [0,62] . . . . . . . . . . . DE. . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,63] . . . . . . . . . . . DE . . . . . . .. strb w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,64] . . . . . . . . . . . DE . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,65] . . . . . . . . . . . . DE . . . . . . .. strh w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,66] . . . . . . . . . . . . DE . . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,67] . . . . . . . . . . . . DE. . . . . . .. stur b0, [x2, #255] +# CHECK-NEXT: [0,68] . . . . . . . . . . . . DE. . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,69] . . . . . . . . . . . . DE . . . . . .. stur d0, [x2, #255] +# CHECK-NEXT: [0,70] . . . . . . . . . . . . DE . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,71] . . . . . . . . . . . . DE . . . . . .. stur h0, [x2, #255] +# CHECK-NEXT: [0,72] . . . . . . . . . . . . DE . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,73] . . . . . . . . . . . . .DE . . . . . .. stur q0, [x2, #255] +# CHECK-NEXT: [0,74] . . . . . . . . . . . . .DE . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,75] . . . . . . . . . . . . . DE . . . . . .. stur s0, [x2, #255] +# CHECK-NEXT: [0,76] . . . . . . . . . . . . . DE . . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,77] . . . . . . . . . . . . . DE. . . . . .. stur w0, [x2, #255] +# CHECK-NEXT: [0,78] . . . . . . . . . . . . . DE. . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,79] . . . . . . . . . . . . . DE . . . . .. sturb w0, [x2, #255] +# CHECK-NEXT: [0,80] . . . . . . . . . . . . . DE . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,81] . . . . . . . . . . . . . DE . . . . .. sturh w0, [x2, #255] +# CHECK-NEXT: [0,82] . . . . . . . . . . . . . DE . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,83] . . . . . . . . . . . . . .DE . . . . .. stnp d0, d1, [x2, #16] +# CHECK-NEXT: [0,84] . . . . . . . . . . . . . .DE . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,85] . . . . . . . . . . . . . . DE . . . . .. stnp q0, q1, [x2, #16] +# CHECK-NEXT: [0,86] . . . . . . . . . . . . . . DE . . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,87] . . . . . . . . . . . . . . DE. . . . .. stnp s0, s1, [x2, #16] +# CHECK-NEXT: [0,88] . . . . . . . . . . . . . . DE. . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,89] . . . . . . . . . . . . . . DE . . . .. stnp s0, s1, [x2, #16] +# CHECK-NEXT: [0,90] . . . . . . . . . . . . . . DE . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,91] . . . . . . . . . . . . . . DE . . . .. stnp w0, w1, [x2, #16] +# CHECK-NEXT: [0,92] . . . . . . . . . . . . . . DE . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,93] . . . . . . . . . . . . . . .DE . . . .. stnp x0, x1, [x2, #16] +# CHECK-NEXT: [0,94] . . . . . . . . . . . . . . .DE . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,95] . . . . . . . . . . . . . . . DE . . . .. stp d0, d1, [x2, #16] +# CHECK-NEXT: [0,96] . . . . . . . . . . . . . . . DE . . . .. add x2, x3, #1 +# CHECK-NEXT: [0,97] . . . . . . . . . . . . . . . DE. . . .. stp d0, d1, [x2, #16]! +# CHECK-NEXT: [0,98] . . . . . . . . . . . . . . . DE . . .. add x2, x3, #1 +# CHECK-NEXT: [0,99] . . . . . . . . . . . . . . . DE . . .. stp d0, d1, [x2], #16 +# CHECK-NEXT: [0,100] . . . . . . . . . . . . . . . .DE . . .. add x2, x3, #1 +# CHECK-NEXT: [0,101] . . . . . . . . . . . . . . . . DE . . .. stp q0, q1, [x2, #16] +# CHECK-NEXT: [0,102] . . . . . . . . . . . . . . . . DE . . .. add x2, x3, #1 +# CHECK-NEXT: [0,103] . . . . . . . . . . . . . . . . DE. . .. stp q0, q1, [x2, #16]! +# CHECK-NEXT: [0,104] . . . . . . . . . . . . . . . . DE . .. add x2, x3, #1 +# CHECK-NEXT: [0,105] . . . . . . . . . . . . . . . . DE . .. stp q0, q1, [x2], #16 +# CHECK-NEXT: [0,106] . . . . . . . . . . . . . . . . .DE . .. add x2, x3, #1 +# CHECK-NEXT: [0,107] . . . . . . . . . . . . . . . . . DE . .. stp s0, s1, [x2, #16] +# CHECK-NEXT: [0,108] . . . . . . . . . . . . . . . . . DE . .. add x2, x3, #1 +# CHECK-NEXT: [0,109] . . . . . . . . . . . . . . . . . DE. .. stp s0, s1, [x2, #16]! +# CHECK-NEXT: [0,110] . . . . . . . . . . . . . . . . . DE .. add x2, x3, #1 +# CHECK-NEXT: [0,111] . . . . . . . . . . . . . . . . . DE .. stp s0, s1, [x2], #16 +# CHECK-NEXT: [0,112] . . . . . . . . . . . . . . . . . .DE .. add x2, x3, #1 +# CHECK-NEXT: [0,113] . . . . . . . . . . . . . . . . . . DE .. stp x0, x1, [x2, #16] +# CHECK-NEXT: [0,114] . . . . . . . . . . . . . . . . . . DE .. add x2, x3, #1 +# CHECK-NEXT: [0,115] . . . . . . . . . . . . . . . . . . DE.. stp x0, x1, [x2, #16]! +# CHECK-NEXT: [0,116] . . . . . . . . . . . . . . . . . . DE. add x2, x3, #1 +# CHECK-NEXT: [0,117] . . . . . . . . . . . . . . . . . . DE stp x0, x1, [x2], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions Index: llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-0-single-add.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-0-single-add.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-0-single-add.s @@ -5,7 +5,7 @@ # CHECK: Iterations: 1000 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 1001 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 2 Index: llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-1-add-seq.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-1-add-seq.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-1-add-seq.s @@ -6,10 +6,10 @@ # CHECK: Iterations: 1000 # CHECK-NEXT: Instructions: 2000 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 1001 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 1.99 -# CHECK-NEXT: IPC: 1.99 +# CHECK-NEXT: uOps Per Cycle: 2.00 +# CHECK-NEXT: IPC: 2.00 # CHECK-NEXT: Block RThroughput: 1.0 Index: llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-3-mul.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-3-mul.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-3-mul.s @@ -7,7 +7,7 @@ # CHECK: Iterations: 1000 # CHECK-NEXT: Instructions: 3000 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 3002 # CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 2 Index: llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-4-sdiv.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-4-sdiv.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-4-sdiv.s @@ -12,10 +12,10 @@ # CHECK: Iterations: 1000 # CHECK-NEXT: Instructions: 4000 -# CHECK-NEXT: Total Cycles: 8004 +# CHECK-NEXT: Total Cycles: 9002 # CHECK-NEXT: Total uOps: 4000 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 0.44 +# CHECK-NEXT: IPC: 0.44 # CHECK-NEXT: Block RThroughput: 8.0 Index: llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-5-mul-sdiv.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-5-mul-sdiv.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-5-mul-sdiv.s @@ -13,10 +13,10 @@ # CHECK: Iterations: 1000 # CHECK-NEXT: Instructions: 5000 -# CHECK-NEXT: Total Cycles: 8004 +# CHECK-NEXT: Total Cycles: 9002 # CHECK-NEXT: Total uOps: 5000 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.62 -# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: uOps Per Cycle: 0.56 +# CHECK-NEXT: IPC: 0.56 # CHECK-NEXT: Block RThroughput: 8.0 Index: llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-6-mul.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-6-mul.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-6-mul.s @@ -16,7 +16,7 @@ # CHECK: Iterations: 1000 # CHECK-NEXT: Instructions: 3000 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 3002 # CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 2 Index: llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-7-cmp.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-7-cmp.s +++ llvm/test/tools/llvm-mca/AArch64/Cortex/IPC/A55-7-cmp.s @@ -8,7 +8,7 @@ # CHECK: Iterations: 1000 # CHECK-NEXT: Instructions: 4000 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 3002 # CHECK-NEXT: Total uOps: 4000 # CHECK: Dispatch Width: 2