Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4901,31 +4901,96 @@ return canCombine(MBB, MO, MulOpc); } -// TODO: There are many more machine instruction opcodes to match: -// 1. Other data types (integer, vectors) -// 2. Other math / logic operations (xor, or) +// TODO: There may be more machine instruction opcodes to match: +// 1. Other data types +// 2. Other math / logic operations // 3. Other forms of the same operation (intrinsics and other variants) bool AArch64InstrInfo::isAssociativeAndCommutative( const MachineInstr &Inst) const { switch (Inst.getOpcode()) { - case AArch64::FADDDrr: + // == Integer types == + // -- Base instructions -- + // Opcodes MULWrr and MULXrr don't exist because + // `MUL , , ` and `MUL , , ` are aliases of + // `MADD , , , WZR` and `MADD , , , XZR` respectively. + // The machine-combiner does not support three-source-operands machine + // instruction. So we cannot reassociate MULs. + case AArch64::ADDWrr: + case AArch64::ADDXrr: + case AArch64::ANDWrr: + case AArch64::ANDXrr: + case AArch64::ORRWrr: + case AArch64::ORRXrr: + case AArch64::EORWrr: + case AArch64::EORXrr: + // -- Advanced SIMD instructions -- + // Opcodes MULv1i64 and MULv2i64 don't exist because corresponding + // `MUL .1D, .1D, .1D` and `MUL .2D, .2D, .2D` + // don't exist in the Advanced SIMD instruction set. + case AArch64::ADDv8i8: + case AArch64::ADDv16i8: + case AArch64::ADDv4i16: + case AArch64::ADDv8i16: + case AArch64::ADDv2i32: + case AArch64::ADDv4i32: + case AArch64::ADDv1i64: + case AArch64::ADDv2i64: + case AArch64::MULv8i8: + case AArch64::MULv16i8: + case AArch64::MULv4i16: + case AArch64::MULv8i16: + case AArch64::MULv2i32: + case AArch64::MULv4i32: + case AArch64::ANDv8i8: + case AArch64::ANDv16i8: + case AArch64::ORRv8i8: + case AArch64::ORRv16i8: + case AArch64::EORv8i8: + case AArch64::EORv16i8: + // -- SVE instructions -- + case AArch64::ADD_ZZZ_B: + case AArch64::ADD_ZZZ_H: + case AArch64::ADD_ZZZ_S: + case AArch64::ADD_ZZZ_D: + case AArch64::MUL_ZZZ_B: + case AArch64::MUL_ZZZ_H: + case AArch64::MUL_ZZZ_S: + case AArch64::MUL_ZZZ_D: + case AArch64::AND_ZZZ: + case AArch64::ORR_ZZZ: + case AArch64::EOR_ZZZ: + return true; + + // == Floating-point types == + // -- Floating-point instructions -- + case AArch64::FADDHrr: case AArch64::FADDSrr: + case AArch64::FADDDrr: + case AArch64::FMULHrr: + case AArch64::FMULSrr: + case AArch64::FMULDrr: + // -- Advanced SIMD instructions -- + case AArch64::FADDv4f16: + case AArch64::FADDv8f16: case AArch64::FADDv2f32: - case AArch64::FADDv2f64: case AArch64::FADDv4f32: - case AArch64::FMULDrr: - case AArch64::FMULSrr: - case AArch64::FMULX32: - case AArch64::FMULX64: - case AArch64::FMULXv2f32: - case AArch64::FMULXv2f64: - case AArch64::FMULXv4f32: + case AArch64::FADDv2f64: + case AArch64::FMULv4f16: + case AArch64::FMULv8f16: case AArch64::FMULv2f32: - case AArch64::FMULv2f64: case AArch64::FMULv4f32: + case AArch64::FMULv2f64: + // -- SVE instructions -- + case AArch64::FADD_ZZZ_H: + case AArch64::FADD_ZZZ_S: + case AArch64::FADD_ZZZ_D: + case AArch64::FMUL_ZZZ_H: + case AArch64::FMUL_ZZZ_S: + case AArch64::FMUL_ZZZ_D: return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath || (Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && Inst.getFlag(MachineInstr::MIFlag::FmNsz)); + default: return false; } Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -710,8 +710,8 @@ ; CHECK-NOLSE-O1-NEXT: ldrb w10, [x0, w1, sxtw] ; CHECK-NOLSE-O1-NEXT: ldurb w11, [x0, #-256] ; CHECK-NOLSE-O1-NEXT: ldrb w8, [x8] -; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 ; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 +; CHECK-NOLSE-O1-NEXT: add w9, w10, w9 ; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 ; CHECK-NOLSE-O1-NEXT: ret ; @@ -733,9 +733,9 @@ ; CHECK-LSE-O1: ; %bb.0: ; CHECK-LSE-O1-NEXT: ldrb w8, [x0, #4095] ; CHECK-LSE-O1-NEXT: ldrb w9, [x0, w1, sxtw] -; CHECK-LSE-O1-NEXT: add w8, w8, w9 -; CHECK-LSE-O1-NEXT: ldurb w9, [x0, #-256] -; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: ldurb w10, [x0, #-256] +; CHECK-LSE-O1-NEXT: add w8, w8, w10 +; CHECK-LSE-O1-NEXT: add w8, w9, w8 ; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O1-NEXT: ldrb w9, [x9] ; CHECK-LSE-O1-NEXT: add w0, w8, w9 @@ -780,8 +780,8 @@ ; CHECK-NOLSE-O1-NEXT: ldrh w10, [x0, w1, sxtw #1] ; CHECK-NOLSE-O1-NEXT: ldurh w11, [x0, #-256] ; CHECK-NOLSE-O1-NEXT: ldrh w8, [x8] -; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 ; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 +; CHECK-NOLSE-O1-NEXT: add w9, w10, w9 ; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 ; CHECK-NOLSE-O1-NEXT: ret ; @@ -803,9 +803,9 @@ ; CHECK-LSE-O1: ; %bb.0: ; CHECK-LSE-O1-NEXT: ldrh w8, [x0, #8190] ; CHECK-LSE-O1-NEXT: ldrh w9, [x0, w1, sxtw #1] -; CHECK-LSE-O1-NEXT: add w8, w8, w9 -; CHECK-LSE-O1-NEXT: ldurh w9, [x0, #-256] -; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: ldurh w10, [x0, #-256] +; CHECK-LSE-O1-NEXT: add w8, w8, w10 +; CHECK-LSE-O1-NEXT: add w8, w9, w8 ; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O1-NEXT: ldrh w9, [x9] ; CHECK-LSE-O1-NEXT: add w0, w8, w9 @@ -850,8 +850,8 @@ ; CHECK-NOLSE-O1-NEXT: ldr w10, [x0, w1, sxtw #2] ; CHECK-NOLSE-O1-NEXT: ldur w11, [x0, #-256] ; CHECK-NOLSE-O1-NEXT: ldr w8, [x8] -; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 ; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 +; CHECK-NOLSE-O1-NEXT: add w9, w10, w9 ; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 ; CHECK-NOLSE-O1-NEXT: ret ; @@ -871,9 +871,9 @@ ; CHECK-LSE-O1: ; %bb.0: ; CHECK-LSE-O1-NEXT: ldr w8, [x0, #16380] ; CHECK-LSE-O1-NEXT: ldr w9, [x0, w1, sxtw #2] -; CHECK-LSE-O1-NEXT: add w8, w8, w9 -; CHECK-LSE-O1-NEXT: ldur w9, [x0, #-256] -; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: ldur w10, [x0, #-256] +; CHECK-LSE-O1-NEXT: add w8, w8, w10 +; CHECK-LSE-O1-NEXT: add w8, w9, w8 ; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O1-NEXT: ldr w9, [x9] ; CHECK-LSE-O1-NEXT: add w0, w8, w9 @@ -916,8 +916,8 @@ ; CHECK-NOLSE-O1-NEXT: ldr x10, [x0, w1, sxtw #3] ; CHECK-NOLSE-O1-NEXT: ldur x11, [x0, #-256] ; CHECK-NOLSE-O1-NEXT: ldr x8, [x8] -; CHECK-NOLSE-O1-NEXT: add x9, x9, x10 ; CHECK-NOLSE-O1-NEXT: add x9, x9, x11 +; CHECK-NOLSE-O1-NEXT: add x9, x10, x9 ; CHECK-NOLSE-O1-NEXT: add x0, x9, x8 ; CHECK-NOLSE-O1-NEXT: ret ; @@ -937,9 +937,9 @@ ; CHECK-LSE-O1: ; %bb.0: ; CHECK-LSE-O1-NEXT: ldr x8, [x0, #32760] ; CHECK-LSE-O1-NEXT: ldr x9, [x0, w1, sxtw #3] -; CHECK-LSE-O1-NEXT: add x8, x8, x9 -; CHECK-LSE-O1-NEXT: ldur x9, [x0, #-256] -; CHECK-LSE-O1-NEXT: add x8, x8, x9 +; CHECK-LSE-O1-NEXT: ldur x10, [x0, #-256] +; CHECK-LSE-O1-NEXT: add x8, x8, x10 +; CHECK-LSE-O1-NEXT: add x8, x9, x8 ; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O1-NEXT: ldr x9, [x9] ; CHECK-LSE-O1-NEXT: add x0, x8, x9 Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -389,8 +389,8 @@ ; CHECK-NEXT: renamable $w10 = LDRBBroW renamable $x0, killed renamable $w1, 1, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_regoff) ; CHECK-NEXT: renamable $w11 = LDURBBi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s8) from %ir.ptr_unscaled) ; CHECK-NEXT: renamable $w8 = LDRBBui killed renamable $x8, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_random) - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w10, 0, pcsections !0 - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0, pcsections !0 + ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0 + ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w10, killed renamable $w9, 0 ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 %ptr_unsigned = getelementptr i8, i8* %p, i32 4095 @@ -421,8 +421,8 @@ ; CHECK-NEXT: renamable $w10 = LDRHHroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s16) from %ir.ptr_regoff) ; CHECK-NEXT: renamable $w11 = LDURHHi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s16) from %ir.ptr_unscaled) ; CHECK-NEXT: renamable $w8 = LDRHHui killed renamable $x8, 0, pcsections !0 :: (load unordered (s16) from %ir.ptr_random) - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w10, 0, pcsections !0 - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0, pcsections !0 + ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0 + ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w10, killed renamable $w9, 0 ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 %ptr_unsigned = getelementptr i16, i16* %p, i32 4095 @@ -453,8 +453,8 @@ ; CHECK-NEXT: renamable $w10 = LDRWroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s32) from %ir.ptr_regoff) ; CHECK-NEXT: renamable $w11 = LDURWi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s32) from %ir.ptr_unscaled) ; CHECK-NEXT: renamable $w8 = LDRWui killed renamable $x8, 0, pcsections !0 :: (load unordered (s32) from %ir.ptr_random) - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w10, 0, pcsections !0 - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0, pcsections !0 + ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0 + ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w10, killed renamable $w9, 0 ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 %ptr_unsigned = getelementptr i32, i32* %p, i32 4095 @@ -485,8 +485,8 @@ ; CHECK-NEXT: renamable $x10 = LDRXroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s64) from %ir.ptr_regoff) ; CHECK-NEXT: renamable $x11 = LDURXi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s64) from %ir.ptr_unscaled) ; CHECK-NEXT: renamable $x8 = LDRXui killed renamable $x8, 0, pcsections !0 :: (load unordered (s64) from %ir.ptr_random) - ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x9, killed renamable $x10, 0, pcsections !0 - ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x9, killed renamable $x11, 0, pcsections !0 + ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x9, killed renamable $x11, 0 + ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x10, killed renamable $x9, 0 ; CHECK-NEXT: $x0 = ADDXrs killed renamable $x9, killed renamable $x8, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $x0 %ptr_unsigned = getelementptr i64, i64* %p, i32 4095 Index: llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll =================================================================== --- llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -frame-pointer=all -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO +; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra -aarch64-enable-mcr=false < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -frame-pointer=all -disable-post-ra -aarch64-enable-mcr=false < %s | FileCheck %s --check-prefix=CHECK-MACHO ; This test aims to check basic correctness of frame layout & ; frame access code. There are 8 functions in this test file, Index: llvm/test/CodeGen/AArch64/arm64-rev.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-rev.ll +++ llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -183,11 +183,11 @@ ; GISEL-NEXT: lsl w9, w0, #8 ; GISEL-NEXT: and w10, w8, #0xff0000 ; GISEL-NEXT: and w11, w9, #0xff000000 +; GISEL-NEXT: and w8, w8, #0xff ; GISEL-NEXT: and w9, w9, #0xff00 ; GISEL-NEXT: orr w10, w11, w10 -; GISEL-NEXT: and w8, w8, #0xff -; GISEL-NEXT: orr w9, w10, w9 -; GISEL-NEXT: orr w0, w9, w8 +; GISEL-NEXT: orr w8, w9, w8 +; GISEL-NEXT: orr w0, w10, w8 ; GISEL-NEXT: ret entry: %tmp1 = lshr i32 %X, 8 @@ -729,16 +729,16 @@ ; GISEL-NEXT: lsl x9, x0, #8 ; GISEL-NEXT: and x10, x8, #0xff000000000000 ; GISEL-NEXT: and x11, x9, #0xff00000000000000 +; GISEL-NEXT: and x12, x8, #0xff00000000 +; GISEL-NEXT: and x13, x9, #0xff0000000000 ; GISEL-NEXT: orr x10, x10, x11 -; GISEL-NEXT: and x11, x8, #0xff00000000 -; GISEL-NEXT: orr x10, x10, x11 -; GISEL-NEXT: and x11, x9, #0xff0000000000 -; GISEL-NEXT: orr x10, x10, x11 -; GISEL-NEXT: and x11, x8, #0xff0000 -; GISEL-NEXT: orr x10, x10, x11 -; GISEL-NEXT: and x11, x9, #0xff000000 -; GISEL-NEXT: orr x10, x10, x11 +; GISEL-NEXT: orr x11, x12, x13 +; GISEL-NEXT: and x12, x8, #0xff0000 +; GISEL-NEXT: and x13, x9, #0xff000000 +; GISEL-NEXT: orr x12, x12, x13 ; GISEL-NEXT: and x8, x8, #0xff +; GISEL-NEXT: orr x10, x10, x11 +; GISEL-NEXT: orr x8, x12, x8 ; GISEL-NEXT: orr x8, x10, x8 ; GISEL-NEXT: and x9, x9, #0xff00 ; GISEL-NEXT: orr x0, x8, x9 @@ -782,21 +782,21 @@ ; GISEL-LABEL: test_rev16_x_hwbyteswaps_complex2: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: lsr x8, x0, #8 -; GISEL-NEXT: lsl x10, x0, #8 -; GISEL-NEXT: and x9, x8, #0xff000000000000 +; GISEL-NEXT: lsl x9, x0, #8 +; GISEL-NEXT: and x10, x8, #0xff000000000000 ; GISEL-NEXT: and x11, x8, #0xff00000000 -; GISEL-NEXT: orr x9, x9, x11 -; GISEL-NEXT: and x11, x8, #0xff0000 -; GISEL-NEXT: orr x9, x9, x11 +; GISEL-NEXT: and x12, x8, #0xff0000 ; GISEL-NEXT: and x8, x8, #0xff -; GISEL-NEXT: orr x8, x9, x8 -; GISEL-NEXT: and x9, x10, #0xff00000000000000 -; GISEL-NEXT: orr x8, x8, x9 -; GISEL-NEXT: and x9, x10, #0xff0000000000 -; GISEL-NEXT: orr x8, x8, x9 -; GISEL-NEXT: and x9, x10, #0xff000000 -; GISEL-NEXT: orr x8, x8, x9 -; GISEL-NEXT: and x9, x10, #0xff00 +; GISEL-NEXT: orr x10, x10, x11 +; GISEL-NEXT: orr x8, x12, x8 +; GISEL-NEXT: and x11, x9, #0xff00000000000000 +; GISEL-NEXT: and x12, x9, #0xff0000000000 +; GISEL-NEXT: orr x11, x11, x12 +; GISEL-NEXT: and x12, x9, #0xff000000 +; GISEL-NEXT: orr x8, x10, x8 +; GISEL-NEXT: orr x10, x11, x12 +; GISEL-NEXT: orr x8, x8, x10 +; GISEL-NEXT: and x9, x9, #0xff00 ; GISEL-NEXT: orr x0, x8, x9 ; GISEL-NEXT: ret entry: @@ -847,17 +847,17 @@ ; GISEL-NEXT: lsl x9, x0, #8 ; GISEL-NEXT: and x10, x8, #0xff000000000000 ; GISEL-NEXT: and x11, x9, #0xff00000000000000 +; GISEL-NEXT: and x12, x8, #0xff00000000 +; GISEL-NEXT: and x13, x9, #0xff0000000000 ; GISEL-NEXT: orr x10, x11, x10 -; GISEL-NEXT: and x11, x8, #0xff00000000 -; GISEL-NEXT: orr x10, x11, x10 -; GISEL-NEXT: and x11, x9, #0xff0000000000 -; GISEL-NEXT: orr x10, x11, x10 -; GISEL-NEXT: and x11, x8, #0xff0000 -; GISEL-NEXT: orr x10, x11, x10 -; GISEL-NEXT: and x11, x9, #0xff000000 -; GISEL-NEXT: orr x10, x11, x10 +; GISEL-NEXT: orr x11, x12, x13 +; GISEL-NEXT: and x12, x8, #0xff0000 +; GISEL-NEXT: and x13, x9, #0xff000000 +; GISEL-NEXT: orr x12, x12, x13 ; GISEL-NEXT: and x8, x8, #0xff -; GISEL-NEXT: orr x8, x8, x10 +; GISEL-NEXT: orr x10, x10, x11 +; GISEL-NEXT: orr x8, x12, x8 +; GISEL-NEXT: orr x8, x10, x8 ; GISEL-NEXT: and x9, x9, #0xff00 ; GISEL-NEXT: orr x0, x9, x8 ; GISEL-NEXT: ret @@ -918,24 +918,24 @@ ; CHECK-LABEL: test_or_and_combine2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsr x8, x0, #8 -; CHECK-NEXT: lsl x10, x0, #8 -; CHECK-NEXT: and x9, x8, #0xff000000000000 +; CHECK-NEXT: lsl x9, x0, #8 +; CHECK-NEXT: and x10, x8, #0xff000000000000 +; CHECK-NEXT: and x11, x9, #0xff00000000 ; CHECK-NEXT: and x8, x8, #0xff0000 -; CHECK-NEXT: orr x9, x9, x10 -; CHECK-NEXT: and x10, x10, #0xff00000000 -; CHECK-NEXT: orr x9, x9, x10 +; CHECK-NEXT: orr x9, x10, x9 +; CHECK-NEXT: orr x8, x11, x8 ; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_or_and_combine2: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: lsr x8, x0, #8 -; GISEL-NEXT: lsl x10, x0, #8 -; GISEL-NEXT: and x9, x8, #0xff000000000000 +; GISEL-NEXT: lsl x9, x0, #8 +; GISEL-NEXT: and x10, x8, #0xff000000000000 +; GISEL-NEXT: and x11, x9, #0xff00000000 ; GISEL-NEXT: and x8, x8, #0xff0000 -; GISEL-NEXT: orr x9, x9, x10 -; GISEL-NEXT: and x10, x10, #0xff00000000 -; GISEL-NEXT: orr x9, x9, x10 +; GISEL-NEXT: orr x9, x10, x9 +; GISEL-NEXT: orr x8, x11, x8 ; GISEL-NEXT: orr x0, x9, x8 ; GISEL-NEXT: ret entry: Index: llvm/test/CodeGen/AArch64/cmp-chains.ll =================================================================== --- llvm/test/CodeGen/AArch64/cmp-chains.ll +++ llvm/test/CodeGen/AArch64/cmp-chains.ll @@ -76,11 +76,11 @@ ; GISEL-NEXT: cmp w0, w1 ; GISEL-NEXT: cset w9, lo ; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: cset w9, ne +; GISEL-NEXT: cset w10, ne ; GISEL-NEXT: cmp w6, w7 +; GISEL-NEXT: cset w11, eq ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: cset w9, eq +; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w0, w8, w9 ; GISEL-NEXT: ret %9 = icmp ugt i32 %2, %3 @@ -166,11 +166,11 @@ ; GISEL-NEXT: cmp w2, w3 ; GISEL-NEXT: cset w9, hi ; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: cset w9, ne +; GISEL-NEXT: cset w10, ne ; GISEL-NEXT: cmp w6, w7 +; GISEL-NEXT: cset w11, eq ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: cset w9, eq +; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w0, w8, w9 ; GISEL-NEXT: ret %9 = icmp ult i32 %0, %1 Index: llvm/test/CodeGen/AArch64/machine-combiner.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-combiner.ll +++ llvm/test/CodeGen/AArch64/machine-combiner.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a710 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a710 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE ; Incremental updates of the instruction depths should be enough for this test ; case. -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math \ +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a710 -enable-unsafe-fp-math \ ; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE ; Verify that the first two adds are independent regardless of how the inputs are @@ -189,8 +189,8 @@ ; CHECK-STD-LABEL: reassociate_muls1: ; CHECK-STD: // %bb.0: ; CHECK-STD-NEXT: fdiv s0, s0, s1 -; CHECK-STD-NEXT: fmul s1, s2, s0 -; CHECK-STD-NEXT: fmul s0, s3, s1 +; CHECK-STD-NEXT: fmul s0, s2, s0 +; CHECK-STD-NEXT: fmul s0, s3, s0 ; CHECK-STD-NEXT: ret ; ; CHECK-UNSAFE-LABEL: reassociate_muls1: @@ -233,8 +233,8 @@ ; CHECK-STD-LABEL: reassociate_muls_double: ; CHECK-STD: // %bb.0: ; CHECK-STD-NEXT: fdiv d0, d0, d1 -; CHECK-STD-NEXT: fmul d1, d2, d0 -; CHECK-STD-NEXT: fmul d0, d3, d1 +; CHECK-STD-NEXT: fmul d0, d2, d0 +; CHECK-STD-NEXT: fmul d0, d3, d0 ; CHECK-STD-NEXT: ret ; ; CHECK-UNSAFE-LABEL: reassociate_muls_double: @@ -249,6 +249,75 @@ ret double %t2 } +; Verify that scalar integer adds are reassociated. + +define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { +; CHECK-LABEL: reassociate_adds_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: udiv w8, w0, w1 +; CHECK-NEXT: add w9, w2, w3 +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: ret + %t0 = udiv i32 %x0, %x1 + %t1 = add i32 %x2, %t0 + %t2 = add i32 %x3, %t1 + ret i32 %t2 +} + +define i64 @reassociate_adds_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { +; CHECK-LABEL: reassociate_adds_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: udiv x8, x0, x1 +; CHECK-NEXT: add x9, x2, x3 +; CHECK-NEXT: add x0, x8, x9 +; CHECK-NEXT: ret + %t0 = udiv i64 %x0, %x1 + %t1 = add i64 %x2, %t0 + %t2 = add i64 %x3, %t1 + ret i64 %t2 +} + +; Verify that scalar bitwise operations are reassociated. + +define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { +; CHECK-LABEL: reassociate_ands_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w1 +; CHECK-NEXT: and w9, w2, w3 +; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ret + %t0 = and i32 %x0, %x1 + %t1 = and i32 %t0, %x2 + %t2 = and i32 %t1, %x3 + ret i32 %t2 +} + +define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { +; CHECK-LABEL: reassociate_ors_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x0, x1 +; CHECK-NEXT: orr x9, x2, x3 +; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: ret + %t0 = or i64 %x0, %x1 + %t1 = or i64 %t0, %x2 + %t2 = or i64 %t1, %x3 + ret i64 %t2 +} + +define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { +; CHECK-LABEL: reassociate_xors_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: eor w9, w2, w3 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %t0 = xor i32 %x0, %x1 + %t1 = xor i32 %t0, %x2 + %t2 = xor i32 %t1, %x3 + ret i32 %t2 +} + ; Verify that we reassociate vector instructions too. define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { @@ -330,6 +399,7 @@ %t2 = fadd <4 x float> %x3, %t1 ret <4 x float> %t2 } + ; Verify that 128-bit vector single-precision multiplies are reassociated. define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { @@ -374,6 +444,186 @@ ret <2 x double> %t2 } +; Verify that vector integer arithmetic operations are reassociated. + +define <2 x i32> @reassociate_muls_v2i32(<2 x i32> %x0, <2 x i32> %x1, <2 x i32> %x2, <2 x i32> %x3) { +; CHECK-LABEL: reassociate_muls_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s +; CHECK-NEXT: mul v1.2s, v2.2s, v3.2s +; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret + %t0 = mul <2 x i32> %x0, %x1 + %t1 = mul <2 x i32> %x2, %t0 + %t2 = mul <2 x i32> %x3, %t1 + ret <2 x i32> %t2 +} + +define <2 x i64> @reassociate_adds_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, <2 x i64> %x3) { +; CHECK-LABEL: reassociate_adds_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: add v1.2d, v2.2d, v3.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %t0 = add <2 x i64> %x0, %x1 + %t1 = add <2 x i64> %x2, %t0 + %t2 = add <2 x i64> %x3, %t1 + ret <2 x i64> %t2 +} + +; Verify that vector bitwise operations are reassociated. + +define <16 x i8> @reassociate_ands_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, <16 x i8> %x3) { +; CHECK-LABEL: reassociate_ands_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v1.16b, v2.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %t0 = or <16 x i8> %x0, %x1 + %t1 = and <16 x i8> %t0, %x2 + %t2 = and <16 x i8> %t1, %x3 + ret <16 x i8> %t2 +} + +define <4 x i16> @reassociate_ors_v4i16(<4 x i16> %x0, <4 x i16> %x1, <4 x i16> %x2, <4 x i16> %x3) { +; CHECK-LABEL: reassociate_ors_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: orr v1.8b, v2.8b, v3.8b +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %t0 = xor <4 x i16> %x0, %x1 + %t1 = or <4 x i16> %t0, %x2 + %t2 = or <4 x i16> %t1, %x3 + ret <4 x i16> %t2 +} + +define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) { +; CHECK-LABEL: reassociate_xors_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: eor v1.16b, v2.16b, v3.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %t0 = and <4 x i32> %x0, %x1 + %t1 = xor <4 x i32> %t0, %x2 + %t2 = xor <4 x i32> %t1, %x3 + ret <4 x i32> %t2 +} + +; Verify that scalable vector FP arithmetic operations are reassociated. + +define @reassociate_adds_nxv4f32( %x0, %x1, %x2, %x3) { +; CHECK-STD-LABEL: reassociate_adds_nxv4f32: +; CHECK-STD: // %bb.0: +; CHECK-STD-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-STD-NEXT: fadd z0.s, z2.s, z0.s +; CHECK-STD-NEXT: fadd z0.s, z3.s, z0.s +; CHECK-STD-NEXT: ret +; +; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f32: +; CHECK-UNSAFE: // %bb.0: +; CHECK-UNSAFE-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-UNSAFE-NEXT: fadd z1.s, z2.s, z3.s +; CHECK-UNSAFE-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-UNSAFE-NEXT: ret + %t0 = fadd reassoc %x0, %x1 + %t1 = fadd reassoc %x2, %t0 + %t2 = fadd reassoc %x3, %t1 + ret %t2 +} + +define @reassociate_muls_nxv2f64( %x0, %x1, %x2, %x3) { +; CHECK-STD-LABEL: reassociate_muls_nxv2f64: +; CHECK-STD: // %bb.0: +; CHECK-STD-NEXT: fmul z0.d, z0.d, z1.d +; CHECK-STD-NEXT: fmul z0.d, z2.d, z0.d +; CHECK-STD-NEXT: fmul z0.d, z3.d, z0.d +; CHECK-STD-NEXT: ret +; +; CHECK-UNSAFE-LABEL: reassociate_muls_nxv2f64: +; CHECK-UNSAFE: // %bb.0: +; CHECK-UNSAFE-NEXT: fmul z0.d, z0.d, z1.d +; CHECK-UNSAFE-NEXT: fmul z1.d, z2.d, z3.d +; CHECK-UNSAFE-NEXT: fmul z0.d, z0.d, z1.d +; CHECK-UNSAFE-NEXT: ret + %t0 = fmul reassoc %x0, %x1 + %t1 = fmul reassoc %x2, %t0 + %t2 = fmul reassoc %x3, %t1 + ret %t2 +} + +; Verify that scalable vector integer arithmetic operations are reassociated. + +define @reassociate_muls_nxv4i32( %x0, %x1, %x2, %x3) { +; CHECK-LABEL: reassociate_muls_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mul z0.s, z0.s, z1.s +; CHECK-NEXT: mul z1.s, z2.s, z3.s +; CHECK-NEXT: mul z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %t0 = mul %x0, %x1 + %t1 = mul %x2, %t0 + %t2 = mul %x3, %t1 + ret %t2 +} + +define @reassociate_adds_nxv2i64( %x0, %x1, %x2, %x3) { +; CHECK-LABEL: reassociate_adds_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: add z1.d, z2.d, z3.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %t0 = add %x0, %x1 + %t1 = add %x2, %t0 + %t2 = add %x3, %t1 + ret %t2 +} + +; Verify that scalable vector bitwise operations are reassociated. + +define @reassociate_ands_nxv16i8( %x0, %x1, %x2, %x3) { +; CHECK-LABEL: reassociate_ands_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: and z1.d, z2.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %t0 = or %x0, %x1 + %t1 = and %t0, %x2 + %t2 = and %t1, %x3 + ret %t2 +} + +define @reassociate_ors_nxv8i16( %x0, %x1, %x2, %x3) { +; CHECK-LABEL: reassociate_ors_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: orr z1.d, z2.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %t0 = xor %x0, %x1 + %t1 = or %t0, %x2 + %t2 = or %t1, %x3 + ret %t2 +} + +define @reassociate_xors_nxv4i32( %x0, %x1, %x2, %x3) { +; CHECK-LABEL: reassociate_xors_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: eor z1.d, z2.d, z3.d +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %t0 = and %x0, %x1 + %t1 = xor %t0, %x2 + %t2 = xor %t1, %x3 + ret %t2 +} + ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016 ; Verify that reassociation is not happening needlessly or wrongly. @@ -423,9 +673,9 @@ ; CHECK-UNSAFE-NEXT: fmov d10, d0 ; CHECK-UNSAFE-NEXT: bl bar ; CHECK-UNSAFE-NEXT: fadd d1, d8, d9 -; CHECK-UNSAFE-NEXT: fadd d0, d10, d0 -; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload ; CHECK-UNSAFE-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload +; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-UNSAFE-NEXT: fadd d0, d10, d0 ; CHECK-UNSAFE-NEXT: fadd d0, d1, d0 ; CHECK-UNSAFE-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload ; CHECK-UNSAFE-NEXT: ret @@ -458,9 +708,9 @@ ; CHECK-NEXT: fmov d10, d0 ; CHECK-NEXT: bl bar ; CHECK-NEXT: fadd d1, d8, d9 -; CHECK-NEXT: fadd d0, d10, d0 -; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fadd d0, d10, d0 ; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -473,4 +723,3 @@ %t2 = fadd double %t0, %t1 ret double %t2 } - Index: llvm/test/CodeGen/AArch64/reduce-and.ll =================================================================== --- llvm/test/CodeGen/AArch64/reduce-and.ll +++ llvm/test/CodeGen/AArch64/reduce-and.ll @@ -264,13 +264,13 @@ ; CHECK-LABEL: test_redand_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[1] +; CHECK-NEXT: umov w11, v0.h[0] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w10, w11, w10 +; CHECK-NEXT: and w0, w10, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v4i8: @@ -295,21 +295,21 @@ ; CHECK-LABEL: test_redand_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w8, v0.b[5] +; CHECK-NEXT: umov w9, v0.b[4] +; CHECK-NEXT: umov w10, v0.b[1] +; CHECK-NEXT: umov w11, v0.b[0] +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[2] +; CHECK-NEXT: umov w14, v0.b[6] +; CHECK-NEXT: umov w15, v0.b[7] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w13 -; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: and w10, w11, w10 +; CHECK-NEXT: and w11, w13, w12 +; CHECK-NEXT: and w9, w10, w11 +; CHECK-NEXT: and w8, w8, w14 +; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: and w0, w8, w15 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v8i8: @@ -352,16 +352,16 @@ ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] ; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w14, v0.b[6] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[5] +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: and w10, w10, w11 +; CHECK-NEXT: and w11, w12, w13 ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: and w10, w11, w14 ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v16i8: @@ -406,16 +406,16 @@ ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] ; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w14, v0.b[6] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[5] +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: and w10, w10, w11 +; CHECK-NEXT: and w11, w12, w13 ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: and w10, w11, w14 ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v32i8: @@ -454,13 +454,13 @@ ; CHECK-LABEL: test_redand_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[1] +; CHECK-NEXT: umov w11, v0.h[0] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w10, w11, w10 +; CHECK-NEXT: and w0, w10, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v4i16: @@ -491,8 +491,8 @@ ; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w9, w10, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v8i16: @@ -525,8 +525,8 @@ ; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w9, w10, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v16i16: Index: llvm/test/CodeGen/AArch64/reduce-or.ll =================================================================== --- llvm/test/CodeGen/AArch64/reduce-or.ll +++ llvm/test/CodeGen/AArch64/reduce-or.ll @@ -263,13 +263,13 @@ ; CHECK-LABEL: test_redor_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[1] +; CHECK-NEXT: umov w11, v0.h[0] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w10, w11, w10 +; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v4i8: @@ -294,21 +294,21 @@ ; CHECK-LABEL: test_redor_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w8, v0.b[5] +; CHECK-NEXT: umov w9, v0.b[4] +; CHECK-NEXT: umov w10, v0.b[1] +; CHECK-NEXT: umov w11, v0.b[0] +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[2] +; CHECK-NEXT: umov w14, v0.b[6] +; CHECK-NEXT: umov w15, v0.b[7] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] -; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w13 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w0, w8, w10 +; CHECK-NEXT: orr w10, w11, w10 +; CHECK-NEXT: orr w11, w13, w12 +; CHECK-NEXT: orr w9, w10, w11 +; CHECK-NEXT: orr w8, w8, w14 +; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: orr w0, w8, w15 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v8i8: @@ -351,16 +351,16 @@ ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] ; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w14, v0.b[6] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[5] +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: orr w10, w10, w11 +; CHECK-NEXT: orr w11, w12, w13 ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: orr w10, w11, w14 ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v16i8: @@ -405,16 +405,16 @@ ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] ; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w14, v0.b[6] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[5] +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: orr w10, w10, w11 +; CHECK-NEXT: orr w11, w12, w13 ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: orr w10, w11, w14 ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v32i8: @@ -453,13 +453,13 @@ ; CHECK-LABEL: test_redor_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[1] +; CHECK-NEXT: umov w11, v0.h[0] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w10, w11, w10 +; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v4i16: @@ -490,8 +490,8 @@ ; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w9, w10, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v8i16: @@ -524,8 +524,8 @@ ; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w9, w10, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v16i16: Index: llvm/test/CodeGen/AArch64/reduce-shuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/reduce-shuffle.ll +++ llvm/test/CodeGen/AArch64/reduce-shuffle.ll @@ -41,48 +41,48 @@ ; CHECK-NEXT: uzp2 v5.4s, v3.4s, v2.4s ; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #12 ; CHECK-NEXT: zip1 v17.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v7.16b, v3.16b +; CHECK-NEXT: zip2 v4.4s, v2.4s, v3.4s ; CHECK-NEXT: zip2 v6.4s, v1.4s, v0.4s ; CHECK-NEXT: zip2 v18.4s, v3.4s, v2.4s -; CHECK-NEXT: uzp2 v5.4s, v5.4s, v3.4s -; CHECK-NEXT: ext v19.16b, v1.16b, v17.16b, #8 -; CHECK-NEXT: mov v1.s[3], v0.s[2] -; CHECK-NEXT: zip2 v4.4s, v2.4s, v3.4s -; CHECK-NEXT: mov v7.16b, v3.16b -; CHECK-NEXT: ext v16.16b, v2.16b, v16.16b, #12 ; CHECK-NEXT: mov v7.s[0], v2.s[1] +; CHECK-NEXT: ext v16.16b, v2.16b, v16.16b, #12 +; CHECK-NEXT: ext v19.16b, v1.16b, v17.16b, #8 +; CHECK-NEXT: uzp2 v5.4s, v5.4s, v3.4s ; CHECK-NEXT: mov v2.s[1], v3.s[0] +; CHECK-NEXT: mov v1.s[3], v0.s[2] +; CHECK-NEXT: mov v7.d[1], v17.d[1] ; CHECK-NEXT: mov v5.d[1], v6.d[1] +; CHECK-NEXT: mov v2.d[1], v19.d[1] ; CHECK-NEXT: mov v18.d[1], v1.d[1] ; CHECK-NEXT: mov v16.d[1], v6.d[1] ; CHECK-NEXT: mov v4.d[1], v1.d[1] -; CHECK-NEXT: mov v7.d[1], v17.d[1] -; CHECK-NEXT: mov v2.d[1], v19.d[1] +; CHECK-NEXT: add v0.4s, v7.4s, v2.4s ; CHECK-NEXT: add v1.4s, v5.4s, v18.4s +; CHECK-NEXT: rev64 v5.4s, v0.4s ; CHECK-NEXT: sub v3.4s, v4.4s, v16.4s ; CHECK-NEXT: rev64 v4.4s, v1.4s -; CHECK-NEXT: add v0.4s, v7.4s, v2.4s ; CHECK-NEXT: sub v2.4s, v2.4s, v7.4s -; CHECK-NEXT: rev64 v5.4s, v0.4s -; CHECK-NEXT: mov v4.d[1], v1.d[1] +; CHECK-NEXT: mov v5.d[1], v0.d[1] ; CHECK-NEXT: add v6.4s, v3.4s, v2.4s ; CHECK-NEXT: sub v2.4s, v2.4s, v3.4s -; CHECK-NEXT: mov v5.d[1], v0.d[1] -; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s +; CHECK-NEXT: mov v4.d[1], v1.d[1] ; CHECK-NEXT: rev64 v7.4s, v2.4s ; CHECK-NEXT: rev64 v3.4s, v6.4s -; CHECK-NEXT: rev64 v4.4s, v0.4s ; CHECK-NEXT: add v1.4s, v1.4s, v5.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: sub v7.4s, v2.4s, v7.4s ; CHECK-NEXT: addp v5.4s, v1.4s, v6.4s ; CHECK-NEXT: addp v2.4s, v0.4s, v2.4s ; CHECK-NEXT: sub v3.4s, v6.4s, v3.4s +; CHECK-NEXT: rev64 v4.4s, v0.4s ; CHECK-NEXT: rev64 v6.4s, v1.4s -; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: zip1 v16.4s, v5.4s, v5.4s ; CHECK-NEXT: ext v17.16b, v2.16b, v7.16b, #4 ; CHECK-NEXT: ext v18.16b, v5.16b, v3.16b, #4 -; CHECK-NEXT: ext v4.16b, v0.16b, v2.16b, #8 +; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: sub v1.4s, v1.4s, v6.4s +; CHECK-NEXT: ext v4.16b, v0.16b, v2.16b, #8 ; CHECK-NEXT: ext v6.16b, v1.16b, v5.16b, #4 ; CHECK-NEXT: trn2 v1.4s, v16.4s, v1.4s ; CHECK-NEXT: zip2 v16.4s, v17.4s, v2.4s @@ -91,41 +91,41 @@ ; CHECK-NEXT: ext v6.16b, v6.16b, v6.16b, #4 ; CHECK-NEXT: ext v16.16b, v7.16b, v16.16b, #12 ; CHECK-NEXT: ext v17.16b, v3.16b, v17.16b, #12 -; CHECK-NEXT: mov v0.s[2], v2.s[1] -; CHECK-NEXT: uzp2 v4.4s, v4.4s, v18.4s ; CHECK-NEXT: mov v3.s[2], v5.s[3] ; CHECK-NEXT: mov v7.s[2], v2.s[3] -; CHECK-NEXT: sub v18.4s, v1.4s, v6.4s -; CHECK-NEXT: mov v6.s[0], v5.s[1] -; CHECK-NEXT: sub v19.4s, v0.4s, v4.4s +; CHECK-NEXT: mov v0.s[2], v2.s[1] +; CHECK-NEXT: uzp2 v4.4s, v4.4s, v18.4s ; CHECK-NEXT: sub v20.4s, v3.4s, v17.4s ; CHECK-NEXT: sub v21.4s, v7.4s, v16.4s -; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: mov v3.s[1], v5.s[2] ; CHECK-NEXT: mov v7.s[1], v2.s[2] -; CHECK-NEXT: add v1.4s, v1.4s, v6.4s -; CHECK-NEXT: add v0.4s, v0.4s, v4.4s +; CHECK-NEXT: sub v18.4s, v1.4s, v6.4s +; CHECK-NEXT: mov v6.s[0], v5.s[1] +; CHECK-NEXT: sub v19.4s, v0.4s, v4.4s +; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: add v2.4s, v3.4s, v17.4s ; CHECK-NEXT: add v3.4s, v7.4s, v16.4s -; CHECK-NEXT: mov v1.d[1], v18.d[1] -; CHECK-NEXT: mov v0.d[1], v19.d[1] +; CHECK-NEXT: add v1.4s, v1.4s, v6.4s ; CHECK-NEXT: mov v3.d[1], v21.d[1] ; CHECK-NEXT: mov v2.d[1], v20.d[1] -; CHECK-NEXT: cmlt v4.8h, v1.8h, #0 -; CHECK-NEXT: cmlt v5.8h, v0.8h, #0 +; CHECK-NEXT: add v0.4s, v0.4s, v4.4s +; CHECK-NEXT: mov v1.d[1], v18.d[1] +; CHECK-NEXT: mov v0.d[1], v19.d[1] ; CHECK-NEXT: cmlt v6.8h, v3.8h, #0 ; CHECK-NEXT: cmlt v7.8h, v2.8h, #0 +; CHECK-NEXT: cmlt v4.8h, v1.8h, #0 ; CHECK-NEXT: add v3.4s, v6.4s, v3.4s ; CHECK-NEXT: add v2.4s, v7.4s, v2.4s +; CHECK-NEXT: cmlt v5.8h, v0.8h, #0 ; CHECK-NEXT: add v1.4s, v4.4s, v1.4s -; CHECK-NEXT: add v0.4s, v5.4s, v0.4s -; CHECK-NEXT: eor v1.16b, v1.16b, v4.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b ; CHECK-NEXT: eor v2.16b, v2.16b, v7.16b ; CHECK-NEXT: eor v3.16b, v3.16b, v6.16b ; CHECK-NEXT: add v2.4s, v2.4s, v3.4s -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v0.4s, v5.4s, v0.4s +; CHECK-NEXT: eor v1.16b, v1.16b, v4.16b +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: lsr w9, w8, #16 @@ -321,25 +321,25 @@ ; CHECK-NEXT: ext v0.16b, v4.16b, v0.16b, #8 ; CHECK-NEXT: ext v3.16b, v16.16b, v3.16b, #8 ; CHECK-NEXT: add v1.4s, v5.4s, v1.4s -; CHECK-NEXT: sub v5.4s, v6.4s, v17.4s +; CHECK-NEXT: sub v2.4s, v7.4s, v2.4s ; CHECK-NEXT: ext v0.16b, v0.16b, v4.16b, #4 ; CHECK-NEXT: ext v3.16b, v3.16b, v16.16b, #4 -; CHECK-NEXT: cmlt v6.8h, v5.8h, #0 -; CHECK-NEXT: sub v2.4s, v7.4s, v2.4s -; CHECK-NEXT: add v4.4s, v6.4s, v5.4s -; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: sub v5.4s, v6.4s, v17.4s ; CHECK-NEXT: cmlt v7.8h, v2.8h, #0 ; CHECK-NEXT: cmlt v17.8h, v1.8h, #0 -; CHECK-NEXT: eor v3.16b, v4.16b, v6.16b -; CHECK-NEXT: cmlt v4.8h, v0.8h, #0 +; CHECK-NEXT: cmlt v6.8h, v5.8h, #0 ; CHECK-NEXT: add v1.4s, v17.4s, v1.4s ; CHECK-NEXT: add v2.4s, v7.4s, v2.4s -; CHECK-NEXT: add v0.4s, v4.4s, v0.4s +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: add v4.4s, v6.4s, v5.4s ; CHECK-NEXT: eor v2.16b, v2.16b, v7.16b ; CHECK-NEXT: eor v1.16b, v1.16b, v17.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v4.16b +; CHECK-NEXT: cmlt v3.8h, v0.8h, #0 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-NEXT: eor v2.16b, v4.16b, v6.16b +; CHECK-NEXT: add v1.4s, v2.4s, v1.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 @@ -545,17 +545,17 @@ ; CHECK-NEXT: cmlt v6.8h, v3.8h, #0 ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: cmlt v2.8h, v1.8h, #0 -; CHECK-NEXT: cmlt v7.8h, v0.8h, #0 -; CHECK-NEXT: add v1.4s, v2.4s, v1.4s ; CHECK-NEXT: add v3.4s, v6.4s, v3.4s +; CHECK-NEXT: add v1.4s, v2.4s, v1.4s +; CHECK-NEXT: cmlt v7.8h, v0.8h, #0 ; CHECK-NEXT: add v4.4s, v5.4s, v4.4s -; CHECK-NEXT: add v0.4s, v7.4s, v0.4s -; CHECK-NEXT: eor v4.16b, v4.16b, v5.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v7.16b ; CHECK-NEXT: eor v3.16b, v3.16b, v6.16b ; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b ; CHECK-NEXT: add v1.4s, v1.4s, v3.4s -; CHECK-NEXT: add v0.4s, v0.4s, v4.4s +; CHECK-NEXT: add v0.4s, v7.4s, v0.4s +; CHECK-NEXT: eor v2.16b, v4.16b, v5.16b +; CHECK-NEXT: add v1.4s, v2.4s, v1.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v7.16b ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 Index: llvm/test/CodeGen/AArch64/reduce-xor.ll =================================================================== --- llvm/test/CodeGen/AArch64/reduce-xor.ll +++ llvm/test/CodeGen/AArch64/reduce-xor.ll @@ -262,13 +262,13 @@ ; CHECK-LABEL: test_redxor_v4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[1] +; CHECK-NEXT: umov w11, v0.h[0] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w10, w11, w10 +; CHECK-NEXT: eor w0, w10, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v4i8: @@ -293,21 +293,21 @@ ; CHECK-LABEL: test_redxor_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w8, v0.b[5] +; CHECK-NEXT: umov w9, v0.b[4] +; CHECK-NEXT: umov w10, v0.b[1] +; CHECK-NEXT: umov w11, v0.b[0] +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[2] +; CHECK-NEXT: umov w14, v0.b[6] +; CHECK-NEXT: umov w15, v0.b[7] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] -; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: eor w8, w8, w12 -; CHECK-NEXT: eor w8, w8, w13 -; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: eor w0, w8, w10 +; CHECK-NEXT: eor w10, w11, w10 +; CHECK-NEXT: eor w11, w13, w12 +; CHECK-NEXT: eor w9, w10, w11 +; CHECK-NEXT: eor w8, w8, w14 +; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: eor w0, w8, w15 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v8i8: @@ -350,16 +350,16 @@ ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] ; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w14, v0.b[6] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[5] +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: eor w10, w10, w11 +; CHECK-NEXT: eor w11, w12, w13 ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 -; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: eor w10, w11, w14 ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v16i8: @@ -404,16 +404,16 @@ ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] ; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w14, v0.b[6] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[5] +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: eor w10, w10, w11 +; CHECK-NEXT: eor w11, w12, w13 ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 -; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: eor w10, w11, w14 ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v32i8: @@ -452,13 +452,13 @@ ; CHECK-LABEL: test_redxor_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[1] +; CHECK-NEXT: umov w11, v0.h[0] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w10, w11, w10 +; CHECK-NEXT: eor w0, w10, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v4i16: @@ -489,8 +489,8 @@ ; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w9, w10, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v8i16: @@ -523,8 +523,8 @@ ; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w9, w10, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v16i16: Index: llvm/test/CodeGen/AArch64/swift-return.ll =================================================================== --- llvm/test/CodeGen/AArch64/swift-return.ll +++ llvm/test/CodeGen/AArch64/swift-return.ll @@ -27,10 +27,10 @@ ; CHECK-LABEL: test2 ; CHECK: bl _gen2 -; CHECK: add [[TMP:x.*]], x0, x1 -; CHECK: add [[TMP]], [[TMP]], x2 -; CHECK: add [[TMP]], [[TMP]], x3 -; CHECK: add x0, [[TMP]], x4 +; CHECK: add [[TMP1:x.*]], x0, x1 +; CHECK: add [[TMP2:x.*]], x2, x3 +; CHECK: add [[TMP3:x.*]], [[TMP1]], [[TMP2]] +; CHECK: add x0, [[TMP3]], x4 ; CHECK-O0-LABEL: test2 ; CHECK-O0: bl _gen2 ; CHECK-O0: add [[TMP:x.*]], x0, x1 @@ -74,9 +74,9 @@ ; CHECK-LABEL: test3 ; CHECK: bl _gen3 -; CHECK: add [[TMP:w.*]], w0, w1 -; CHECK: add [[TMP]], [[TMP]], w2 -; CHECK: add w0, [[TMP]], w3 +; CHECK: add [[TMP1:w.*]], w0, w1 +; CHECK: add [[TMP2:w.*]], w2, w3 +; CHECK: add [[TMP3:w.*]], [[TMP1]], [[TMP2]] ; CHECK-O0-LABEL: test3 ; CHECK-O0: bl _gen3 ; CHECK-O0: add [[TMP:w.*]], w0, w1 @@ -158,9 +158,9 @@ ; CHECK-DAG: fadd d0, d0, d1 ; CHECK-DAG: fadd d0, d0, d2 ; CHECK-DAG: fadd d0, d0, d3 -; CHECK-DAG: add [[TMP:w.*]], w0, w1 -; CHECK-DAG: add [[TMP]], [[TMP]], w2 -; CHECK-DAG: add w0, [[TMP]], w3 +; CHECK-DAG: add [[TMP1:w.*]], w0, w1 +; CHECK-DAG: add [[TMP2:w.*]], w2, w3 +; CHECK-DAG: add [[TMP3:w.*]], [[TMP1]], [[TMP2]] ; CHECK-O0-LABEL: test6 ; CHECK-O0: bl _gen6 ; CHECK-O0-DAG: fadd d0, d0, d1 Index: llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll =================================================================== --- llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -97,28 +97,28 @@ ; CHECK-LABEL: test_v9i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: umov w10, v0.b[6] +; CHECK-NEXT: umov w15, v0.b[7] ; CHECK-NEXT: mov v1.b[9], w8 ; CHECK-NEXT: mov v1.b[10], w8 ; CHECK-NEXT: mov v1.b[11], w8 ; CHECK-NEXT: mov v1.b[13], w8 +; CHECK-NEXT: umov w8, v0.b[4] ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: and v1.8b, v0.8b, v1.8b -; CHECK-NEXT: umov w8, v1.b[1] -; CHECK-NEXT: umov w9, v1.b[0] -; CHECK-NEXT: umov w10, v1.b[2] -; CHECK-NEXT: umov w11, v1.b[3] -; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[5] -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w8, w8, w15 +; CHECK-NEXT: and v1.8b, v0.8b, v1.8b +; CHECK-NEXT: umov w11, v1.b[1] +; CHECK-NEXT: umov w12, v1.b[0] +; CHECK-NEXT: umov w13, v1.b[2] +; CHECK-NEXT: umov w14, v1.b[3] +; CHECK-NEXT: and w9, w12, w11 +; CHECK-NEXT: and w11, w13, w14 +; CHECK-NEXT: and w9, w9, w11 +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a) ret i8 %b