diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11459,12 +11459,20 @@ assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 && "Unexpected extension factor."); unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits(); - // FIXME: support multi-step zipping? - if (Scale != 2) + // Only support simple power-of-2 extensions. + if (!isPowerOf2_32(Scale)) return SDValue(); - SDValue Zeros = DAG.getConstant(0, dl, SrcVT); - return DAG.getBitcast(VT, - DAG.getNode(AArch64ISD::ZIP1, dl, SrcVT, SrcOp, Zeros)); + for (unsigned Step = 0, NumSteps = Log2_32(Scale); Step != NumSteps; ++Step) { + SDValue Zeros = DAG.getConstant(0, dl, SrcVT); + EVT NewVT = EVT::getVectorVT( + *DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), 2 * SrcVT.getScalarSizeInBits()), + SrcVT.getVectorNumElements() / 2); + SrcOp = DAG.getNode(AArch64ISD::ZIP1, dl, SrcVT, SrcOp, Zeros); + SrcOp = DAG.getBitcast(NewVT, SrcOp); + SrcVT = NewVT; + } + return SrcOp; } SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll --- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=arm64-apple-ios -mattr=+sve -o - %s | FileCheck %s -; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+sve -o - %s | FileCheck --check-prefix=CHECK-BE %s -; RUN: llc -mtriple=arm64-apple-ios -mattr=+global-isel -mattr=+sve -o - %s | FileCheck %s -; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+global-isel -mattr=+sve -o - %s | FileCheck --check-prefix=CHECK-BE %s +; RUN: llc -mtriple=arm64-apple-ios -mattr=+sve -o - %s | FileCheck --implicit-check-not=LCPI --implicit-check-not=lCPI %s +; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+sve -o - %s | FileCheck --implicit-check-not=LCPI --implicit-check-not=lCPI --check-prefix=CHECK-BE %s +; RUN: llc -mtriple=arm64-apple-ios -mattr=+global-isel -mattr=+sve -o - %s | FileCheck --implicit-check-not=LCPI --implicit-check-not=lCPI %s +; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+global-isel -mattr=+sve -o - %s | FileCheck --implicit-check-not=LCPI --implicit-check-not=lCPI --check-prefix=CHECK-BE %s ; CHECK-LABEL: lCPI0_0: ; CHECK-NEXT: .byte 0 ; 0x0 @@ -993,24 +993,6 @@ ret void } -; CHECK-LABEL: lCPI11_0: -; CHECK-NEXT: .byte 0 ; 0x0 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 1 ; 0x1 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 2 ; 0x2 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 3 ; 0x3 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff - ; CHECK-BE-LABEL: .LCPI11_0: ; CHECK-BE-NEXT: .byte 255 // 0xff ; CHECK-BE-NEXT: .byte 255 // 0xff @@ -1032,22 +1014,19 @@ define void @zext_v4i8_to_v4i32_in_loop(ptr %src, ptr %dst) { ; CHECK-LABEL: zext_v4i8_to_v4i32_in_loop: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: Lloh12: -; CHECK-NEXT: adrp x9, lCPI11_0@PAGE +; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: Lloh13: -; CHECK-NEXT: ldr q0, [x9, lCPI11_0@PAGEOFF] ; CHECK-NEXT: LBB11_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr s1, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: tbl.16b v1, { v1 }, v0 +; CHECK-NEXT: zip1.16b v1, v1, v0 +; CHECK-NEXT: zip1.8h v1, v1, v0 ; CHECK-NEXT: str q1, [x1], #64 ; CHECK-NEXT: b.ne LBB11_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh13 ; ; CHECK-BE-LABEL: zext_v4i8_to_v4i32_in_loop: ; CHECK-BE: // %bb.0: // %entry @@ -1194,18 +1173,18 @@ define void @zext_v12i8_to_v12i32_in_loop(ptr %src, ptr %dst) { ; CHECK-LABEL: zext_v12i8_to_v12i32_in_loop: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: Lloh14: +; CHECK-NEXT: Lloh12: ; CHECK-NEXT: adrp x9, lCPI12_0@PAGE -; CHECK-NEXT: Lloh15: +; CHECK-NEXT: Lloh13: ; CHECK-NEXT: adrp x10, lCPI12_1@PAGE -; CHECK-NEXT: Lloh16: +; CHECK-NEXT: Lloh14: ; CHECK-NEXT: adrp x11, lCPI12_2@PAGE ; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: Lloh17: +; CHECK-NEXT: Lloh15: ; CHECK-NEXT: ldr q0, [x9, lCPI12_0@PAGEOFF] -; CHECK-NEXT: Lloh18: +; CHECK-NEXT: Lloh16: ; CHECK-NEXT: ldr q1, [x10, lCPI12_1@PAGEOFF] -; CHECK-NEXT: Lloh19: +; CHECK-NEXT: Lloh17: ; CHECK-NEXT: ldr q2, [x11, lCPI12_2@PAGEOFF] ; CHECK-NEXT: LBB12_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -1220,9 +1199,9 @@ ; CHECK-NEXT: b.ne LBB12_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh19 -; CHECK-NEXT: .loh AdrpLdr Lloh15, Lloh18 ; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh17 +; CHECK-NEXT: .loh AdrpLdr Lloh13, Lloh16 +; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh15 ; ; CHECK-BE-LABEL: zext_v12i8_to_v12i32_in_loop: ; CHECK-BE: // %bb.0: // %entry @@ -2192,22 +2171,22 @@ define void @zext_v20i8_to_v20i24_in_loop(ptr %src, ptr %dst) { ; CHECK-LABEL: zext_v20i8_to_v20i24_in_loop: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: Lloh20: +; CHECK-NEXT: Lloh18: ; CHECK-NEXT: adrp x9, lCPI20_0@PAGE -; CHECK-NEXT: Lloh21: +; CHECK-NEXT: Lloh19: ; CHECK-NEXT: adrp x10, lCPI20_1@PAGE -; CHECK-NEXT: Lloh22: +; CHECK-NEXT: Lloh20: ; CHECK-NEXT: adrp x11, lCPI20_2@PAGE -; CHECK-NEXT: Lloh23: +; CHECK-NEXT: Lloh21: ; CHECK-NEXT: adrp x12, lCPI20_3@PAGE ; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: Lloh24: +; CHECK-NEXT: Lloh22: ; CHECK-NEXT: ldr q0, [x9, lCPI20_0@PAGEOFF] -; CHECK-NEXT: Lloh25: +; CHECK-NEXT: Lloh23: ; CHECK-NEXT: ldr q1, [x10, lCPI20_1@PAGEOFF] -; CHECK-NEXT: Lloh26: +; CHECK-NEXT: Lloh24: ; CHECK-NEXT: ldr q2, [x11, lCPI20_2@PAGEOFF] -; CHECK-NEXT: Lloh27: +; CHECK-NEXT: Lloh25: ; CHECK-NEXT: ldr q3, [x12, lCPI20_3@PAGEOFF] ; CHECK-NEXT: LBB20_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -2228,10 +2207,10 @@ ; CHECK-NEXT: b.ne LBB20_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh23, Lloh27 -; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh26 ; CHECK-NEXT: .loh AdrpLdr Lloh21, Lloh25 ; CHECK-NEXT: .loh AdrpLdr Lloh20, Lloh24 +; CHECK-NEXT: .loh AdrpLdr Lloh19, Lloh23 +; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh22 ; ; CHECK-BE-LABEL: zext_v20i8_to_v20i24_in_loop: ; CHECK-BE: // %bb.0: // %entry @@ -2519,30 +2498,30 @@ define void @zext_v23i8_to_v23i48_in_loop(ptr %src, ptr %dst) { ; CHECK-LABEL: zext_v23i8_to_v23i48_in_loop: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: Lloh28: +; CHECK-NEXT: Lloh26: ; CHECK-NEXT: adrp x9, lCPI21_0@PAGE -; CHECK-NEXT: Lloh29: +; CHECK-NEXT: Lloh27: ; CHECK-NEXT: adrp x10, lCPI21_1@PAGE -; CHECK-NEXT: Lloh30: +; CHECK-NEXT: Lloh28: ; CHECK-NEXT: adrp x11, lCPI21_2@PAGE ; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: Lloh31: +; CHECK-NEXT: Lloh29: ; CHECK-NEXT: ldr q0, [x9, lCPI21_0@PAGEOFF] -; CHECK-NEXT: Lloh32: +; CHECK-NEXT: Lloh30: ; CHECK-NEXT: adrp x9, lCPI21_3@PAGE -; CHECK-NEXT: Lloh33: +; CHECK-NEXT: Lloh31: ; CHECK-NEXT: ldr q1, [x10, lCPI21_1@PAGEOFF] -; CHECK-NEXT: Lloh34: +; CHECK-NEXT: Lloh32: ; CHECK-NEXT: adrp x10, lCPI21_4@PAGE -; CHECK-NEXT: Lloh35: +; CHECK-NEXT: Lloh33: ; CHECK-NEXT: ldr q2, [x11, lCPI21_2@PAGEOFF] -; CHECK-NEXT: Lloh36: +; CHECK-NEXT: Lloh34: ; CHECK-NEXT: adrp x11, lCPI21_5@PAGE -; CHECK-NEXT: Lloh37: +; CHECK-NEXT: Lloh35: ; CHECK-NEXT: ldr q3, [x9, lCPI21_3@PAGEOFF] -; CHECK-NEXT: Lloh38: +; CHECK-NEXT: Lloh36: ; CHECK-NEXT: ldr q4, [x10, lCPI21_4@PAGEOFF] -; CHECK-NEXT: Lloh39: +; CHECK-NEXT: Lloh37: ; CHECK-NEXT: ldr q5, [x11, lCPI21_5@PAGEOFF] ; CHECK-NEXT: LBB21_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -2570,15 +2549,15 @@ ; CHECK-NEXT: b.ne LBB21_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh36, Lloh39 -; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh38 -; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh37 -; CHECK-NEXT: .loh AdrpAdrp Lloh30, Lloh36 +; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh37 +; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh36 ; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh35 -; CHECK-NEXT: .loh AdrpAdrp Lloh29, Lloh34 -; CHECK-NEXT: .loh AdrpLdr Lloh29, Lloh33 -; CHECK-NEXT: .loh AdrpAdrp Lloh28, Lloh32 -; CHECK-NEXT: .loh AdrpLdr Lloh28, Lloh31 +; CHECK-NEXT: .loh AdrpAdrp Lloh28, Lloh34 +; CHECK-NEXT: .loh AdrpLdr Lloh28, Lloh33 +; CHECK-NEXT: .loh AdrpAdrp Lloh27, Lloh32 +; CHECK-NEXT: .loh AdrpLdr Lloh27, Lloh31 +; CHECK-NEXT: .loh AdrpAdrp Lloh26, Lloh30 +; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh29 ; ; CHECK-BE-LABEL: zext_v23i8_to_v23i48_in_loop: ; CHECK-BE: // %bb.0: // %entry