Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2964,34 +2964,46 @@ return selectCopy(I, TII, MRI, TRI, RBI); } + // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) + // + SUBREG_TO_REG. + // // If we are zero extending from 32 bits to 64 bits, it's possible that // the instruction implicitly does the zero extend for us. In that case, - // we can just emit a SUBREG_TO_REG. + // we only need the SUBREG_TO_REG. if (IsGPR && SrcSize == 32 && DstSize == 64) { // Unlike with the G_LOAD case, we don't want to look through copies - // here. + // here. (See isDef32.) MachineInstr *Def = MRI.getVRegDef(SrcReg); - if (Def && isDef32(*Def)) { - MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) - .addImm(0) - .addUse(SrcReg) - .addImm(AArch64::sub_32); - - if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, - MRI)) { - LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n"); - return false; - } - - if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, - MRI)) { - LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n"); - return false; - } - - I.eraseFromParent(); - return true; + Register SubregToRegSrc = SrcReg; + + // Does the instruction implicitly zero extend? + if (!Def || !isDef32(*Def)) { + // No. Zero out using an OR. + Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + const Register ZReg = AArch64::WZR; + MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0); + SubregToRegSrc = OrDst; + } + + MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) + .addImm(0) + .addUse(SubregToRegSrc) + .addImm(AArch64::sub_32); + + if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, + MRI)) { + LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n"); + return false; } + + if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, + MRI)) { + LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n"); + return false; + } + + I.eraseFromParent(); + return true; } } Index: llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir @@ -112,8 +112,8 @@ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $x0 ; CHECK: %copy:gpr32 = COPY $w0 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32 - ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %copy, 0 + ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %zext.sub_32 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] ; CHECK: TBNZW [[COPY1]], 3, %bb.1 Index: llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir @@ -254,8 +254,8 @@ ; CHECK: liveins: $w0, $x1 ; CHECK: %reg0:gpr32 = COPY $w0 ; CHECK: %reg1:gpr64 = COPY $x1 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg0, %subreg.sub_32 - ; CHECK: %ext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %reg0, 0 + ; CHECK: %ext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 ; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv ; CHECK: $w0 = COPY %cmp Index: llvm/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir @@ -167,9 +167,9 @@ ; CHECK-LABEL: name: zext_s64_from_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32 - ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 - ; CHECK: $x0 = COPY [[UBFMXri]] + ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[COPY]], 0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] %0(s32) = COPY $w0 %1(s64) = G_ZEXT %0 $x0 = COPY %1(s64) Index: llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-redundant-zext.mir @@ -64,13 +64,13 @@ bb.0: liveins: $w0 - ; We should have a UBFMXri here, because isDef32 disallows copies. + ; We should have a ORRWrs here, because isDef32 disallows copies. ; CHECK-LABEL: name: dont_fold_copy ; CHECK: liveins: $w0 ; CHECK: %copy:gpr32 = COPY $w0 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %copy, %subreg.sub_32 - ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %copy, 0 + ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 ; CHECK: $x0 = COPY %zext ; CHECK: RET_ReallyLR implicit $x0 %copy:gpr(s32) = COPY $w0 @@ -88,14 +88,14 @@ bb.0: liveins: $w0 - ; We should have a UBFMXri here, because isDef32 disallows bitcasts. + ; We should have a ORRWrs here, because isDef32 disallows bitcasts. ; CHECK-LABEL: name: dont_fold_bitcast ; CHECK: liveins: $w0 ; CHECK: %copy:gpr32all = COPY $w0 ; CHECK: %bitcast1:gpr32 = COPY %copy - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %bitcast1, %subreg.sub_32 - ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %bitcast1, 0 + ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 ; CHECK: $x0 = COPY %zext ; CHECK: RET_ReallyLR implicit $x0 %copy:gpr(s32) = COPY $w0 @@ -115,14 +115,14 @@ bb.0: liveins: $x0 - ; We should have a UBFMXri here, because isDef32 disallows truncs. + ; We should have a ORRWrs here, because isDef32 disallows truncs. ; CHECK-LABEL: name: dont_fold_trunc ; CHECK: liveins: $x0 ; CHECK: %copy:gpr64sp = COPY $x0 ; CHECK: %trunc:gpr32common = COPY %copy.sub_32 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %trunc, %subreg.sub_32 - ; CHECK: %zext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %trunc, 0 + ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 ; CHECK: $x0 = COPY %zext ; CHECK: RET_ReallyLR implicit $x0 %copy:gpr(s64) = COPY $x0 @@ -151,11 +151,11 @@ ; CHECK: successors: %bb.2(0x80000000) ; CHECK: bb.2: ; CHECK: %phi:gpr32 = PHI %copy1, %bb.0, %copy2, %bb.1 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %phi, %subreg.sub_32 - ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 - ; CHECK: $x0 = COPY [[UBFMXri]] + ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, %phi, 0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x0 - ; We should have a UBFMXri here, because isDef32 disallows phis. + ; We should have a ORRWrs here, because isDef32 disallows phis. bb.0: liveins: $w0, $w1, $w2 @@ -192,9 +192,9 @@ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY1]], [[COPY]] - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ADDWrr]], %subreg.sub_32 - ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 - ; CHECK: $x0 = COPY [[UBFMXri]] + ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWrr]], 0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrs]], %subreg.sub_32 + ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] ; CHECK: RET_ReallyLR implicit $x0 %0:gpr(s32) = COPY $w0 %1:gpr(s32) = COPY $w1 Index: llvm/test/CodeGen/AArch64/arm64-rev.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-rev.ll +++ llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -1,34 +1,34 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s -; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=FALLBACK,GISEL +; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=aarch64-eabi -aarch64-neon-syntax=apple 2>&1 | FileCheck %s --check-prefixes=GISEL -; FALLBACK-NOT: remark{{.*}}test_rev_w +; GISEL-NOT: remark{{.*}}test_rev_w define i32 @test_rev_w(i32 %a) nounwind { ; CHECK-LABEL: test_rev_w: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: rev w0, w0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_w: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: rev w0, w0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_w: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: rev w0, w0 +; GISEL-NEXT: ret entry: %0 = tail call i32 @llvm.bswap.i32(i32 %a) ret i32 %0 } -; FALLBACK-NOT: remark{{.*}}test_rev_x +; GISEL-NOT: remark{{.*}}test_rev_x define i64 @test_rev_x(i64 %a) nounwind { ; CHECK-LABEL: test_rev_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: rev x0, x0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_x: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: rev x0, x0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_x: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: rev x0, x0 +; GISEL-NEXT: ret entry: %0 = tail call i64 @llvm.bswap.i64(i64 %a) ret i64 %0 @@ -43,12 +43,12 @@ ; CHECK-NEXT: lsr w0, w8, #16 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_w_srl16: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: and w8, w0, #0xffff -; FALLBACK-NEXT: rev w8, w8 -; FALLBACK-NEXT: lsr w0, w8, #16 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_w_srl16: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w0, #0xffff +; GISEL-NEXT: rev w8, w8 +; GISEL-NEXT: lsr w0, w8, #16 +; GISEL-NEXT: ret entry: %0 = zext i16 %a to i32 %1 = tail call i32 @llvm.bswap.i32(i32 %0) @@ -64,12 +64,12 @@ ; CHECK-NEXT: lsr w0, w8, #16 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_w_srl16_load: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: ldrh w8, [x0] -; FALLBACK-NEXT: rev w8, w8 -; FALLBACK-NEXT: lsr w0, w8, #16 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_w_srl16_load: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: ldrh w8, [x0] +; GISEL-NEXT: rev w8, w8 +; GISEL-NEXT: lsr w0, w8, #16 +; GISEL-NEXT: ret entry: %0 = load i16, i16 *%a %1 = zext i16 %0 to i32 @@ -86,13 +86,13 @@ ; CHECK-NEXT: rev16 w0, w8 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_w_srl16_add: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: and w8, w1, #0xff -; FALLBACK-NEXT: add w8, w8, w0, uxtb -; FALLBACK-NEXT: rev w8, w8 -; FALLBACK-NEXT: lsr w0, w8, #16 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_w_srl16_add: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: and w8, w1, #0xff +; GISEL-NEXT: add w8, w8, w0, uxtb +; GISEL-NEXT: rev w8, w8 +; GISEL-NEXT: lsr w0, w8, #16 +; GISEL-NEXT: ret entry: %0 = zext i8 %a to i32 %1 = zext i8 %b to i32 @@ -112,13 +112,12 @@ ; CHECK-NEXT: lsr x0, x8, #32 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_x_srl32: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: // kill: def $w0 killed $w0 def $x0 -; FALLBACK-NEXT: ubfx x8, x0, #0, #32 -; FALLBACK-NEXT: rev x8, x8 -; FALLBACK-NEXT: lsr x0, x8, #32 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_x_srl32: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: mov w8, w0 +; GISEL-NEXT: rev x8, x8 +; GISEL-NEXT: lsr x0, x8, #32 +; GISEL-NEXT: ret entry: %0 = zext i32 %a to i64 %1 = tail call i64 @llvm.bswap.i64(i64 %0) @@ -134,12 +133,12 @@ ; CHECK-NEXT: lsr x0, x8, #32 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_x_srl32_load: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: ldr w8, [x0] -; FALLBACK-NEXT: rev x8, x8 -; FALLBACK-NEXT: lsr x0, x8, #32 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_x_srl32_load: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: ldr w8, [x0] +; GISEL-NEXT: rev x8, x8 +; GISEL-NEXT: lsr x0, x8, #32 +; GISEL-NEXT: ret entry: %0 = load i32, i32 *%a %1 = zext i32 %0 to i64 @@ -155,13 +154,13 @@ ; CHECK-NEXT: rev32 x0, x8 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev_x_srl32_shift: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: lsl x8, x0, #33 -; FALLBACK-NEXT: lsr x8, x8, #35 -; FALLBACK-NEXT: rev x8, x8 -; FALLBACK-NEXT: lsr x0, x8, #32 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev_x_srl32_shift: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: lsl x8, x0, #33 +; GISEL-NEXT: lsr x8, x8, #35 +; GISEL-NEXT: rev x8, x8 +; GISEL-NEXT: lsr x0, x8, #32 +; GISEL-NEXT: ret entry: %0 = shl i64 %a, 33 %1 = lshr i64 %0, 35 @@ -179,18 +178,18 @@ ; CHECK-NEXT: rev16 w0, w0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev16_w: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: lsr w8, w0, #8 -; FALLBACK-NEXT: lsl w9, w0, #8 -; FALLBACK-NEXT: and w10, w8, #0xff0000 -; FALLBACK-NEXT: and w11, w9, #0xff000000 -; FALLBACK-NEXT: and w9, w9, #0xff00 -; FALLBACK-NEXT: orr w10, w11, w10 -; FALLBACK-NEXT: and w8, w8, #0xff -; FALLBACK-NEXT: orr w9, w10, w9 -; FALLBACK-NEXT: orr w0, w9, w8 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev16_w: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: lsr w8, w0, #8 +; GISEL-NEXT: lsl w9, w0, #8 +; GISEL-NEXT: and w10, w8, #0xff0000 +; GISEL-NEXT: and w11, w9, #0xff000000 +; GISEL-NEXT: and w9, w9, #0xff00 +; GISEL-NEXT: orr w10, w11, w10 +; GISEL-NEXT: and w8, w8, #0xff +; GISEL-NEXT: orr w9, w10, w9 +; GISEL-NEXT: orr w0, w9, w8 +; GISEL-NEXT: ret entry: %tmp1 = lshr i32 %X, 8 %X15 = bitcast i32 %X to i32 @@ -215,12 +214,12 @@ ; CHECK-NEXT: ror x0, x8, #16 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev16_x: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: rev x8, x0 -; FALLBACK-NEXT: lsl x9, x8, #48 -; FALLBACK-NEXT: orr x0, x9, x8, lsr #16 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev16_x: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: rev x8, x0 +; GISEL-NEXT: lsl x9, x8, #48 +; GISEL-NEXT: orr x0, x9, x8, lsr #16 +; GISEL-NEXT: ret entry: %0 = tail call i64 @llvm.bswap.i64(i64 %a) %1 = lshr i64 %0, 16 @@ -235,12 +234,12 @@ ; CHECK-NEXT: rev32 x0, x0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_rev32_x: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: rev x8, x0 -; FALLBACK-NEXT: lsl x9, x8, #32 -; FALLBACK-NEXT: orr x0, x9, x8, lsr #32 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_rev32_x: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: rev x8, x0 +; GISEL-NEXT: lsl x9, x8, #32 +; GISEL-NEXT: orr x0, x9, x8, lsr #32 +; GISEL-NEXT: ret entry: %0 = tail call i64 @llvm.bswap.i64(i64 %a) %1 = lshr i64 %0, 32 @@ -256,11 +255,11 @@ ; CHECK-NEXT: rev64.8b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64D8: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev64.8b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64D8: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev64.8b v0, v0 +; GISEL-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> ret <8 x i8> %tmp2 @@ -273,11 +272,11 @@ ; CHECK-NEXT: rev64.4h v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64D16: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev64.4h v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64D16: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev64.4h v0, v0 +; GISEL-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> ret <4 x i16> %tmp2 @@ -290,11 +289,11 @@ ; CHECK-NEXT: rev64.2s v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64D32: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev64.2s v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64D32: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev64.2s v0, v0 +; GISEL-NEXT: ret %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> ret <2 x i32> %tmp2 @@ -307,11 +306,11 @@ ; CHECK-NEXT: rev64.2s v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64Df: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev64.2s v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64Df: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev64.2s v0, v0 +; GISEL-NEXT: ret %tmp1 = load <2 x float>, <2 x float>* %A %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> ret <2 x float> %tmp2 @@ -324,11 +323,11 @@ ; CHECK-NEXT: rev64.16b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64Q8: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr q0, [x0] -; FALLBACK-NEXT: rev64.16b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64Q8: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: rev64.16b v0, v0 +; GISEL-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> ret <16 x i8> %tmp2 @@ -341,11 +340,11 @@ ; CHECK-NEXT: rev64.8h v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64Q16: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr q0, [x0] -; FALLBACK-NEXT: rev64.8h v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64Q16: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: rev64.8h v0, v0 +; GISEL-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> ret <8 x i16> %tmp2 @@ -358,11 +357,11 @@ ; CHECK-NEXT: rev64.4s v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64Q32: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr q0, [x0] -; FALLBACK-NEXT: rev64.4s v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64Q32: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: rev64.4s v0, v0 +; GISEL-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> ret <4 x i32> %tmp2 @@ -375,11 +374,11 @@ ; CHECK-NEXT: rev64.4s v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64Qf: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr q0, [x0] -; FALLBACK-NEXT: rev64.4s v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64Qf: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: rev64.4s v0, v0 +; GISEL-NEXT: ret %tmp1 = load <4 x float>, <4 x float>* %A %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> ret <4 x float> %tmp2 @@ -392,11 +391,11 @@ ; CHECK-NEXT: rev32.8b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev32D8: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev32.8b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev32D8: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev32.8b v0, v0 +; GISEL-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> ret <8 x i8> %tmp2 @@ -409,11 +408,11 @@ ; CHECK-NEXT: rev32.4h v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev32D16: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev32.4h v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev32D16: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev32.4h v0, v0 +; GISEL-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> ret <4 x i16> %tmp2 @@ -428,8 +427,11 @@ ; ; GISEL-LABEL: test_vrev32Q8: ; GISEL: // %bb.0: -; GISEL: tbl.16b v0, { v0, v1 }, v2 -; GISEL: ret +; GISEL-NEXT: adrp x8, .LCPI21_0 +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] +; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 +; GISEL-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> ret <16 x i8> %tmp2 @@ -444,8 +446,11 @@ ; ; GISEL-LABEL: test_vrev32Q16: ; GISEL: // %bb.0: -; GISEL: tbl.16b v0, { v0, v1 }, v2 -; GISEL: ret +; GISEL-NEXT: adrp x8, .LCPI22_0 +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] +; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 +; GISEL-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> ret <8 x i16> %tmp2 @@ -458,11 +463,11 @@ ; CHECK-NEXT: rev16.8b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev16D8: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev16.8b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev16D8: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev16.8b v0, v0 +; GISEL-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> ret <8 x i8> %tmp2 @@ -477,8 +482,11 @@ ; ; GISEL-LABEL: test_vrev16Q8: ; GISEL: // %bb.0: -; GISEL: tbl.16b v0, { v0, v1 }, v2 -; GISEL: ret +; GISEL-NEXT: adrp x8, .LCPI24_0 +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] +; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 +; GISEL-NEXT: ret %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> ret <16 x i8> %tmp2 @@ -493,11 +501,11 @@ ; CHECK-NEXT: rev64.8b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64D8_undef: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: ldr d0, [x0] -; FALLBACK-NEXT: rev64.8b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64D8_undef: +; GISEL: // %bb.0: +; GISEL-NEXT: ldr d0, [x0] +; GISEL-NEXT: rev64.8b v0, v0 +; GISEL-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> ret <8 x i8> %tmp2 @@ -512,8 +520,11 @@ ; ; GISEL-LABEL: test_vrev32Q16_undef: ; GISEL: // %bb.0: -; GISEL: tbl.16b v0, { v0, v1 }, v2 -; GISEL: ret +; GISEL-NEXT: adrp x8, .LCPI26_0 +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI26_0] +; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 +; GISEL-NEXT: ret %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> ret <8 x i16> %tmp2 @@ -529,13 +540,13 @@ ; CHECK-NEXT: st1.h { v0 }[6], [x1] ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev64: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: ldr q0, [x0] -; FALLBACK-NEXT: add x8, x1, #2 // =2 -; FALLBACK-NEXT: st1.h { v0 }[5], [x8] -; FALLBACK-NEXT: st1.h { v0 }[6], [x1] -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: add x8, x1, #2 // =2 +; GISEL-NEXT: st1.h { v0 }[5], [x8] +; GISEL-NEXT: st1.h { v0 }[6], [x1] +; GISEL-NEXT: ret entry: %0 = bitcast <4 x i16>* %source to <8 x i16>* %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4 @@ -559,18 +570,18 @@ ; CHECK-NEXT: str q0, [x1, #176] ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: float_vrev64: -; FALLBACK: // %bb.0: // %entry -; FALLBACK-NEXT: movi d0, #0000000000000000 -; FALLBACK-NEXT: mov.s v0[1], v0[0] -; FALLBACK-NEXT: mov.s v0[2], v0[0] -; FALLBACK-NEXT: adrp x8, .LCPI28_0 -; FALLBACK-NEXT: mov.s v0[3], v0[0] -; FALLBACK-NEXT: ldr q1, [x0] -; FALLBACK-NEXT: ldr q2, [x8, :lo12:.LCPI28_0] -; FALLBACK-NEXT: tbl.16b v0, { v0, v1 }, v2 -; FALLBACK-NEXT: str q0, [x1, #176] -; FALLBACK-NEXT: ret +; GISEL-LABEL: float_vrev64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: movi d0, #0000000000000000 +; GISEL-NEXT: mov.s v0[1], v0[0] +; GISEL-NEXT: mov.s v0[2], v0[0] +; GISEL-NEXT: adrp x8, .LCPI28_0 +; GISEL-NEXT: mov.s v0[3], v0[0] +; GISEL-NEXT: ldr q1, [x0] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI28_0] +; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 +; GISEL-NEXT: str q0, [x1, #176] +; GISEL-NEXT: ret entry: %0 = bitcast float* %source to <4 x float>* %tmp2 = load <4 x float>, <4 x float>* %0, align 4 @@ -587,10 +598,10 @@ ; CHECK-NEXT: rev32.16b v0, v0 ; CHECK-NEXT: ret ; -; FALLBACK-LABEL: test_vrev32_bswap: -; FALLBACK: // %bb.0: -; FALLBACK-NEXT: rev32.16b v0, v0 -; FALLBACK-NEXT: ret +; GISEL-LABEL: test_vrev32_bswap: +; GISEL: // %bb.0: +; GISEL-NEXT: rev32.16b v0, v0 +; GISEL-NEXT: ret %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source) ret <4 x i32> %bswap }