Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -85,6 +85,7 @@ // Do some preprocessing of G_PHIs before we begin selection. void processPHIs(MachineFunction &MF); + bool earlySelectAND(MachineInstr &MI, MachineRegisterInfo &MRI) const; bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; /// Eliminate same-sized cross-bank copies into stores before selectImpl(). @@ -1666,6 +1667,76 @@ return true; } +bool AArch64InstructionSelector::earlySelectAND( + MachineInstr &MI, MachineRegisterInfo &MRI) const { + assert(MI.getOpcode() == TargetOpcode::G_AND); + // Look for the following: + // %low_bit_mask_cst = G_CONSTANT iN low_bit_mask + // %immr_cst = G_CONSTANT iN immr + // %lshr = G_LSHR %something, %immr_cst + // %dst = G_AND %lshr, %low_bit_mask_cst + // + // And produce + // + // %dst = UBFM %something, immr, imms + // + // Where imms = immr + trailing_ones(low_bit_mask) + // + // When both immr and imms are in the range [0, size of dst in bits) + // + // TODO: Handle other cases from isBitfieldExtractOpFromAnd in + // AArch64ISelDAGToDAG. + Register Dst = MI.getOperand(0).getReg(); + auto DstTy = MRI.getType(Dst); + if (DstTy.isVector()) + return false; + + // UBFM only supports 32-bit and 64-bit registers. + unsigned DstSize = DstTy.getSizeInBits(); + if (DstSize != 32 && DstSize != 64) + return false; + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + // Look for a mask on the G_AND's RHS. + // The immediate is a mask of the low bits iff imm & (imm+1) == 0 + auto MaybeLowBitMask = getConstantVRegValWithLookThrough(RHS, MRI); + if (!MaybeLowBitMask) + return false; + uint64_t LowBitMask = MaybeLowBitMask->Value; + if (LowBitMask & (LowBitMask + 1)) + return false; + + // Look for %lshr = G_LSHR %something, %constant. + MachineInstr *Lshr = getOpcodeDef(TargetOpcode::G_LSHR, LHS, MRI); + if (!Lshr) + return false; + + auto MaybeLshrImm = + getConstantVRegValWithLookThrough(Lshr->getOperand(2).getReg(), MRI); + if (!MaybeLshrImm) + return false; + + // Check that the immediates we want to pass to the UBFM are legal. Both must + // be in the range [0, DstSize). + uint64_t ImmR = MaybeLshrImm->Value; + if (ImmR >= DstSize) + return false; + uint64_t ImmS = ImmR + + (DstSize == 32 ? countTrailingOnes(LowBitMask) + : countTrailingOnes(LowBitMask)) - + 1; + if (ImmS >= DstSize) + return false; + + MachineIRBuilder MIB(MI); + unsigned Opc = DstSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri; + auto UBFM = + MIB.buildInstr(Opc, {Dst}, {Lshr->getOperand(1).getReg(), ImmR, ImmS}); + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*UBFM, TII, TRI, RBI); +} + bool AArch64InstructionSelector::earlySelectSHL( MachineInstr &I, MachineRegisterInfo &MRI) const { // We try to match the immediate variant of LSL, which is actually an alias @@ -1752,6 +1823,8 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); switch (I.getOpcode()) { + case TargetOpcode::G_AND: + return earlySelectAND(I, MRI); case TargetOpcode::G_SHL: return earlySelectSHL(I, MRI); case TargetOpcode::G_CONSTANT: { Index: llvm/test/CodeGen/AArch64/GlobalISel/select-and-to-ubfm.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-and-to-ubfm.mir @@ -0,0 +1,300 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Check that we can recognize a G_AND and a G_LSHR which can be combined into +# a UBFM. + +... +--- +name: and_to_ubfm_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; CHECK-LABEL: name: and_to_ubfm_s32 + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %res:gpr32 = UBFMWri %copy, 22, 22 + ; CHECK: $w0 = COPY %res + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %immr:gpr(s64) = G_CONSTANT i64 22 + %lshr:gpr(s32) = G_LSHR %copy, %immr(s64) + %mask:gpr(s32) = G_CONSTANT i32 1 + %res:gpr(s32) = G_AND %lshr, %mask + $w0 = COPY %res(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: and_to_ubfm_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: and_to_ubfm_s64 + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: %res:gpr64 = UBFMXri %copy, 22, 22 + ; CHECK: $x0 = COPY %res + ; CHECK: RET_ReallyLR implicit $x0 + %copy:gpr(s64) = COPY $x0 + %immr:gpr(s64) = G_CONSTANT i64 22 + %lshr:gpr(s64) = G_LSHR %copy, %immr(s64) + %mask:gpr(s64) = G_CONSTANT i64 1 + %res:gpr(s64) = G_AND %lshr, %mask + $x0 = COPY %res(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: too_large_immr_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; We can't combine here because both of the immediates passed to UBFM must + ; be smaller than the size of the register. + ; + ; In this case, immr is too large. + ; + ; CHECK-LABEL: name: too_large_immr_s32 + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %immr:gpr32 = MOVi32imm 40 + ; CHECK: %lshr:gpr32 = LSRVWr %copy, %immr + ; CHECK: %res:gpr32sp = ANDWri %lshr, 0 + ; CHECK: $w0 = COPY %res + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %immr:gpr(s32) = G_CONSTANT i32 40 + %lshr:gpr(s32) = G_LSHR %copy, %immr(s32) + %mask:gpr(s32) = G_CONSTANT i32 1 + %res:gpr(s32) = G_AND %lshr, %mask + $w0 = COPY %res(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: too_large_imms_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; We can't combine here because both of the immediates passed to UBFM must + ; be smaller than the size of the register. + ; + ; Trailing ones of the mask: 2 + ; immr = 31 + ; + ; -> imms = 31 + 2 - 1 = 32, which is too large. + ; + ; CHECK-LABEL: name: too_large_imms_s32 + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %lshr:gpr32 = UBFMWri %copy, 31, 31 + ; CHECK: %res:gpr32sp = ANDWri %lshr, 1 + ; CHECK: $w0 = COPY %res + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %immr:gpr(s64) = G_CONSTANT i64 31 + %lshr:gpr(s32) = G_LSHR %copy, %immr(s64) + %mask:gpr(s32) = G_CONSTANT i32 3 + %res:gpr(s32) = G_AND %lshr, %mask + $w0 = COPY %res(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: too_large_immr_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; CHECK-LABEL: name: too_large_immr_s64 + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 64 + ; CHECK: %immr:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; CHECK: %lshr:gpr64 = LSRVXr %copy, %immr + ; CHECK: %res:gpr64sp = ANDXri %lshr, 4096 + ; CHECK: $x0 = COPY %res + ; CHECK: RET_ReallyLR implicit $x0 + %copy:gpr(s64) = COPY $x0 + %immr:gpr(s64) = G_CONSTANT i64 64 + %lshr:gpr(s64) = G_LSHR %copy, %immr(s64) + %mask:gpr(s64) = G_CONSTANT i64 1 + %res:gpr(s64) = G_AND %lshr, %mask + $x0 = COPY %res(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: too_large_imms_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; We can't combine here because both of the immediates passed to UBFM must + ; be smaller than the size of the register. + ; + ; Trailing ones of the mask: 3 + ; immr = 62 + ; + ; -> imms = 62 + 3 = 65, which is too large. + ; + ; CHECK-LABEL: name: too_large_imms_s64 + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr64 = COPY $x0 + ; CHECK: %lshr:gpr64 = UBFMXri %copy, 62, 63 + ; CHECK: %res:gpr64sp = ANDXri %lshr, 4098 + ; CHECK: $x0 = COPY %res + ; CHECK: RET_ReallyLR implicit $x0 + %copy:gpr(s64) = COPY $x0 + %immr:gpr(s64) = G_CONSTANT i64 62 + %lshr:gpr(s64) = G_LSHR %copy, %immr(s64) + %mask:gpr(s64) = G_CONSTANT i64 7 + %res:gpr(s64) = G_AND %lshr, %mask + $x0 = COPY %res(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: bad_low_bit_mask +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; The mask value has to be a valid low-bit mask. + ; + ; That is, mask & (mask + 1) == 0. + ; + ; mask = 2 + ; 2 & (2 + 1) != 0 + ; + ; CHECK-LABEL: name: bad_low_bit_mask + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %lshr:gpr32 = UBFMWri %copy, 20, 31 + ; CHECK: %res:gpr32sp = ANDWri %lshr, 1984 + ; CHECK: $w0 = COPY %res + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %immr:gpr(s64) = G_CONSTANT i64 20 + %lshr:gpr(s32) = G_LSHR %copy, %immr(s64) + %mask:gpr(s32) = G_CONSTANT i32 2 + %res:gpr(s32) = G_AND %lshr, %mask + $w0 = COPY %res(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_fold_negative_immr +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; Both immediates must be in [0, size of register). + ; + ; immr = -10, which is not allowed for a ubfm. + ; + ; CHECK-LABEL: name: dont_fold_negative_immr + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %immr:gpr32 = MOVi32imm -10 + ; CHECK: %lshr:gpr32 = LSRVWr %copy, %immr + ; CHECK: %res:gpr32sp = ANDWri %lshr, 0 + ; CHECK: $w0 = COPY %res + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %immr:gpr(s32) = G_CONSTANT i32 -10 + %lshr:gpr(s32) = G_LSHR %copy, %immr(s32) + %mask:gpr(s32) = G_CONSTANT i32 1 + %res:gpr(s32) = G_AND %lshr, %mask + $w0 = COPY %res(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: dont_fold_negative_imms +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; Both immediates must be in [0, size of register). + ; + ; imms = 0 + trailing ones(0) - 1 = -1, which is not allowed for a ubfm. + ; + ; CHECK-LABEL: name: dont_fold_negative_imms + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %mask:gpr32 = COPY $wzr + ; CHECK: %res:gpr32 = ANDWrs %mask, %copy, 64 + ; CHECK: $w0 = COPY %res + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %immr:gpr(s32) = G_CONSTANT i32 0 + %lshr:gpr(s32) = G_LSHR %copy, %immr(s32) + %mask:gpr(s32) = G_CONSTANT i32 0 + %res:gpr(s32) = G_AND %lshr, %mask + $w0 = COPY %res(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: zero +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + ; 0 is a valid value for immr and imms. + ; immr = 0 + ; imms = 0 + trailing ones(1) - 1 = 0 + ; + ; CHECK-LABEL: name: zero + ; CHECK: liveins: $w0 + ; CHECK: %copy:gpr32 = COPY $w0 + ; CHECK: %res:gpr32 = UBFMWri %copy, 0, 0 + ; CHECK: $w0 = COPY %res + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %immr:gpr(s32) = G_CONSTANT i32 0 + %lshr:gpr(s32) = G_LSHR %copy, %immr(s32) + %mask:gpr(s32) = G_CONSTANT i32 1 + %res:gpr(s32) = G_AND %lshr, %mask + $w0 = COPY %res(s32) + RET_ReallyLR implicit $w0 + +... Index: llvm/test/CodeGen/AArch64/arm64-rev.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-rev.ll +++ llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -1,34 +1,34 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s -; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=FALLBACK,GISEL +; RUN: llc < %s -global-isel -global-isel-abort=2 -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=GISEL -; FALLBACK-NOT: remark{{.*}}test_rev_w +; GISEL-NOT: remark{{.*}}test_rev_w define i32 @test_rev_w(i32 %a) nounwind { ; CHECK-LABEL: test_rev_w: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: rev w0, w0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_w: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: rev w0, w0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_w: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: rev w0, w0 +; GISEL-NEXT: ret entry: %0 = tail call i32 @llvm.bswap.i32(i32 %a) ret i32 %0 } -; FALLBACK-NOT: remark{{.*}}test_rev_x +; GISEL-NOT: remark{{.*}}test_rev_x define i64 @test_rev_x(i64 %a) nounwind { ; CHECK-LABEL: test_rev_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: rev x0, x0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_x: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: rev x0, x0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_x: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: rev x0, x0 +; GISEL-NEXT: ret entry: %0 = tail call i64 @llvm.bswap.i64(i64 %a) ret i64 %0 @@ -43,12 +43,12 @@ ; CHECK-NEXT: lsr w0, w8, #16 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_w_srl16: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: and w8, w0, #0xffff -; FALLBACK-NEXT: rev w8, w8 -; FALLBACK-NEXT: lsr w0, w8, #16 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_w_srl16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w0, #0xffff +; GISEL-NEXT: rev w8, w8 +; GISEL-NEXT: lsr w0, w8, #16 +; GISEL-NEXT: ret entry: %0 = zext i16 %a to i32 %1 = tail call i32 @llvm.bswap.i32(i32 %0) @@ -64,12 +64,12 @@ ; CHECK-NEXT: lsr w0, w8, #16 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_w_srl16_load: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: ldrh w8, [x0] -; FALLBACK-NEXT: rev w8, w8 -; FALLBACK-NEXT: lsr w0, w8, #16 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_w_srl16_load: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: ldrh w8, [x0] +; GISEL-NEXT: rev w8, w8 +; GISEL-NEXT: lsr w0, w8, #16 +; GISEL-NEXT: ret entry: %0 = load i16, i16 *%a %1 = zext i16 %0 to i32 @@ -86,13 +86,13 @@ ; CHECK-NEXT: rev16 w0, w8 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_w_srl16_add: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: and w8, w1, #0xff -; FALLBACK-NEXT: add w8, w8, w0, uxtb -; FALLBACK-NEXT: rev w8, w8 -; FALLBACK-NEXT: lsr w0, w8, #16 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_w_srl16_add: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w1, #0xff +; GISEL-NEXT: add w8, w8, w0, uxtb +; GISEL-NEXT: rev w8, w8 +; GISEL-NEXT: lsr w0, w8, #16 +; GISEL-NEXT: ret entry: %0 = zext i8 %a to i32 %1 = zext i8 %b to i32 @@ -112,13 +112,13 @@ ; CHECK-NEXT: lsr x0, x8, #32 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_x_srl32: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: // kill: def $w0 killed $w0 def $x0 -; FALLBACK-NEXT: ubfx x8, x0, #0, #32 -; FALLBACK-NEXT: rev x8, x8 -; FALLBACK-NEXT: lsr x0, x8, #32 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_x_srl32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: // kill: def $w0 killed $w0 def $x0 +; GISEL-NEXT: ubfx x8, x0, #0, #32 +; GISEL-NEXT: rev x8, x8 +; GISEL-NEXT: lsr x0, x8, #32 +; GISEL-NEXT: ret entry: %0 = zext i32 %a to i64 %1 = tail call i64 @llvm.bswap.i64(i64 %0) @@ -134,12 +134,12 @@ ; CHECK-NEXT: lsr x0, x8, #32 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_x_srl32_load: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: ldr w8, [x0] -; FALLBACK-NEXT: rev x8, x8 -; FALLBACK-NEXT: lsr x0, x8, #32 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_x_srl32_load: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: ldr w8, [x0] +; GISEL-NEXT: rev x8, x8 +; GISEL-NEXT: lsr x0, x8, #32 +; GISEL-NEXT: ret entry: %0 = load i32, i32 *%a %1 = zext i32 %0 to i64 @@ -155,13 +155,13 @@ ; CHECK-NEXT: rev32 x0, x8 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_x_srl32_shift: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: lsl x8, x0, #33 -; FALLBACK-NEXT: lsr x8, x8, #35 -; FALLBACK-NEXT: rev x8, x8 -; FALLBACK-NEXT: lsr x0, x8, #32 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_x_srl32_shift: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: lsl x8, x0, #33 +; GISEL-NEXT: lsr x8, x8, #35 +; GISEL-NEXT: rev x8, x8 +; GISEL-NEXT: lsr x0, x8, #32 +; GISEL-NEXT: ret entry: %0 = shl i64 %a, 33 %1 = lshr i64 %0, 35 @@ -179,18 +179,18 @@ ; CHECK-NEXT: rev16 w0, w0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev16_w: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: lsr w8, w0, #8 -; FALLBACK-NEXT: lsl w9, w0, #8 -; FALLBACK-NEXT: and w10, w8, #0xff0000 -; FALLBACK-NEXT: and w11, w9, #0xff000000 -; FALLBACK-NEXT: and w9, w9, #0xff00 -; FALLBACK-NEXT: orr w10, w11, w10 -; FALLBACK-NEXT: and w8, w8, #0xff -; FALLBACK-NEXT: orr w9, w10, w9 -; FALLBACK-NEXT: orr w0, w9, w8 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev16_w: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: lsr w8, w0, #8 +; GISEL-NEXT: lsl w9, w0, #8 +; GISEL-NEXT: and w8, w8, #0xff0000 +; GISEL-NEXT: and w11, w9, #0xff000000 +; GISEL-NEXT: and w9, w9, #0xff00 +; GISEL-NEXT: orr w8, w11, w8 +; GISEL-NEXT: ubfx w10, w0, #8, #8 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w0, w8, w10 +; GISEL-NEXT: ret entry: %tmp1 = lshr i32 %X, 8 %X15 = bitcast i32 %X to i32 @@ -215,12 +215,12 @@ ; CHECK-NEXT: ror x0, x8, #16 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev16_x: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: rev x8, x0 -; FALLBACK-NEXT: lsl x9, x8, #48 -; FALLBACK-NEXT: orr x0, x9, x8, lsr #16 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev16_x: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: rev x8, x0 +; GISEL-NEXT: lsl x9, x8, #48 +; GISEL-NEXT: orr x0, x9, x8, lsr #16 +; GISEL-NEXT: ret entry: %0 = tail call i64 @llvm.bswap.i64(i64 %a) %1 = lshr i64 %0, 16 @@ -235,12 +235,12 @@ ; CHECK-NEXT: rev32 x0, x0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev32_x: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: rev x8, x0 -; FALLBACK-NEXT: lsl x9, x8, #32 -; FALLBACK-NEXT: orr x0, x9, x8, lsr #32 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev32_x: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: rev x8, x0 +; GISEL-NEXT: lsl x9, x8, #32 +; GISEL-NEXT: orr x0, x9, x8, lsr #32 +; GISEL-NEXT: ret entry: %0 = tail call i64 @llvm.bswap.i64(i64 %a) %1 = lshr i64 %0, 32 @@ -256,11 +256,11 @@ ; CHECK-NEXT: rev64.8b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64D8: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev64.8b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64D8: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev64.8b v0, v0 +; GISEL-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> ret <8 x i8> %tmp2 @@ -273,11 +273,11 @@ ; CHECK-NEXT: rev64.4h v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64D16: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev64.4h v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64D16: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev64.4h v0, v0 +; GISEL-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> ret <4 x i16> %tmp2 @@ -290,11 +290,11 @@ ; CHECK-NEXT: rev64.2s v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64D32: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev64.2s v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64D32: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev64.2s v0, v0 +; GISEL-NEXT: ret %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> ret <2 x i32> %tmp2 @@ -307,11 +307,11 @@ ; CHECK-NEXT: rev64.2s v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64Df: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev64.2s v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64Df: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev64.2s v0, v0 +; GISEL-NEXT: ret %tmp1 = load <2 x float>, <2 x float>* %A %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> ret <2 x float> %tmp2 @@ -324,11 +324,11 @@ ; CHECK-NEXT: rev64.16b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64Q8: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr q0, [x0] -; FALLBACK-NEXT: rev64.16b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64Q8: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: rev64.16b v0, v0 +; GISEL-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> ret <16 x i8> %tmp2 @@ -341,11 +341,11 @@ ; CHECK-NEXT: rev64.8h v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64Q16: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr q0, [x0] -; FALLBACK-NEXT: rev64.8h v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64Q16: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: rev64.8h v0, v0 +; GISEL-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> ret <8 x i16> %tmp2 @@ -358,11 +358,11 @@ ; CHECK-NEXT: rev64.4s v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64Q32: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr q0, [x0] -; FALLBACK-NEXT: rev64.4s v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64Q32: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: rev64.4s v0, v0 +; GISEL-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> ret <4 x i32> %tmp2 @@ -375,11 +375,11 @@ ; CHECK-NEXT: rev64.4s v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64Qf: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr q0, [x0] -; FALLBACK-NEXT: rev64.4s v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64Qf: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: rev64.4s v0, v0 +; GISEL-NEXT: ret %tmp1 = load <4 x float>, <4 x float>* %A %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> ret <4 x float> %tmp2 @@ -392,11 +392,11 @@ ; CHECK-NEXT: rev32.8b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev32D8: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev32.8b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev32D8: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev32.8b v0, v0 +; GISEL-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> ret <8 x i8> %tmp2 @@ -409,11 +409,11 @@ ; CHECK-NEXT: rev32.4h v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev32D16: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev32.4h v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev32D16: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev32.4h v0, v0 +; GISEL-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> ret <4 x i16> %tmp2 @@ -428,8 +428,11 @@ ; ; GISEL-LABEL: test_vrev32Q8: ; GISEL: // %bb.0: -; GISEL: tbl.16b v0, { v0, v1 }, v2 -; GISEL: ret +; GISEL-NEXT: adrp x8, .LCPI21_0 +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] +; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 +; GISEL-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> ret <16 x i8> %tmp2 @@ -444,8 +447,11 @@ ; ; GISEL-LABEL: test_vrev32Q16: ; GISEL: // %bb.0: -; GISEL: tbl.16b v0, { v0, v1 }, v2 -; GISEL: ret +; GISEL-NEXT: adrp x8, .LCPI22_0 +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] +; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 +; GISEL-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> ret <8 x i16> %tmp2 @@ -458,11 +464,11 @@ ; CHECK-NEXT: rev16.8b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev16D8: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev16.8b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev16D8: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev16.8b v0, v0 +; GISEL-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> ret <8 x i8> %tmp2 @@ -477,8 +483,11 @@ ; ; GISEL-LABEL: test_vrev16Q8: ; GISEL: // %bb.0: -; GISEL: tbl.16b v0, { v0, v1 }, v2 -; GISEL: ret +; GISEL-NEXT: adrp x8, .LCPI24_0 +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] +; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 +; GISEL-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> ret <16 x i8> %tmp2 @@ -493,11 +502,11 @@ ; CHECK-NEXT: rev64.8b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64D8_undef: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev64.8b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64D8_undef: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev64.8b v0, v0 +; GISEL-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> ret <8 x i8> %tmp2 @@ -512,8 +521,11 @@ ; ; GISEL-LABEL: test_vrev32Q16_undef: ; GISEL: // %bb.0: -; GISEL: tbl.16b v0, { v0, v1 }, v2 -; GISEL: ret +; GISEL-NEXT: adrp x8, .LCPI26_0 +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI26_0] +; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 +; GISEL-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> ret <8 x i16> %tmp2 @@ -529,13 +541,13 @@ ; CHECK-NEXT: st1.h { v0 }[6], [x1] ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: ldr q0, [x0] -; FALLBACK-NEXT: add x8, x1, #2 // =2 -; FALLBACK-NEXT: st1.h { v0 }[5], [x8] -; FALLBACK-NEXT: st1.h { v0 }[6], [x1] -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: add x8, x1, #2 // =2 +; GISEL-NEXT: st1.h { v0 }[5], [x8] +; GISEL-NEXT: st1.h { v0 }[6], [x1] +; GISEL-NEXT: ret entry: %0 = bitcast <4 x i16>* %source to <8 x i16>* %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4 @@ -559,18 +571,18 @@ ; CHECK-NEXT: str q0, [x1, #176] ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: float_vrev64: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: fmov s0, wzr -; FALLBACK-NEXT: mov.s v0[1], v0[0] -; FALLBACK-NEXT: mov.s v0[2], v0[0] -; FALLBACK-NEXT: adrp x8, .LCPI28_0 -; FALLBACK-NEXT: mov.s v0[3], v0[0] -; FALLBACK-NEXT: ldr q1, [x0] -; FALLBACK-NEXT: ldr q2, [x8, :lo12:.LCPI28_0] -; FALLBACK-NEXT: tbl.16b v0, { v0, v1 }, v2 -; FALLBACK-NEXT: str q0, [x1, #176] -; FALLBACK-NEXT: ret +; GISEL-LABEL: float_vrev64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: fmov s0, wzr +; GISEL-NEXT: mov.s v0[1], v0[0] +; GISEL-NEXT: mov.s v0[2], v0[0] +; GISEL-NEXT: adrp x8, .LCPI28_0 +; GISEL-NEXT: mov.s v0[3], v0[0] +; GISEL-NEXT: ldr q1, [x0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI28_0] +; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 +; GISEL-NEXT: str q0, [x1, #176] +; GISEL-NEXT: ret entry: %0 = bitcast float* %source to <4 x float>* %tmp2 = load <4 x float>, <4 x float>* %0, align 4 @@ -587,10 +599,10 @@ ; CHECK-NEXT: rev32.16b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev32_bswap: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: rev32.16b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev32_bswap: +; GISEL: // %bb.0: +; GISEL-NEXT: rev32.16b v0, v0 +; GISEL-NEXT: ret %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source) ret <4 x i32> %bswap }