Details
Diff Detail
- Repository
- rG LLVM Github Monorepo
Event Timeline
marked as "WIP" because there are a few extraneous copies:
$ cat bswap.ll ; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s define void @test1(<2 x i16>* %p) { %in = load <2 x i16>, <2 x i16>* %p %out = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %in) store <2 x i16> %out, <2 x i16>* %p ret void } declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) nounwind readnone
GISel:
$ ./bin/llc -global-isel=1 -global-isel-abort=1 bswap.ll -mtriple=arm64-apple-ios -o - .section __TEXT,__text,regular,pure_instructions .globl _test1 ; -- Begin function test1 .p2align 2 _test1: ; @test1 .cfi_startproc ; %bb.0: ldr h0, [x0] ldr h1, [x0, #2] mov.h v0[1], v1[0] fmov w8, s0 mov.s v0[0], w8 rev32.8b v0, v0 ushr.2s v0, v0, #16 fmov s0, s0 mov h1, v0[1] str h0, [x0] str h1, [x0, #2] ret .cfi_endproc ; -- End function .subsections_via_symbols
SDAG:
$ ./bin/llc -global-isel=0 -global-isel-abort=1 bswap.ll -mtriple=arm64-apple-ios -o - .section __TEXT,__text,regular,pure_instructions .globl _test1 ; -- Begin function test1 .p2align 2 _test1: ; @test1 .cfi_startproc ; %bb.0: ld1.h { v0 }[0], [x0] add x8, x0, #2 ; =2 ld1.h { v0 }[2], [x8] rev32.8b v0, v0 ushr.2s v0, v0, #16 mov.s w8, v0[1] fmov w9, s0 strh w9, [x0] strh w8, [x0, #2] ret .cfi_endproc ; -- End function .subsections_via_symbols
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp | ||
---|---|---|
3999 ↗ | (On Diff #358487) | This looks similar to getSubRegForClass? Maybe it's possible to share some code there? |
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp | ||
---|---|---|
3999 ↗ | (On Diff #358487) | Also, it'd be nice to have a specific test for G_UNMERGE_VALUES selection. I think that this part + that test could be moved into an independent patch. |
Getting closer:
SDAG:
$ ./bin/llc -global-isel=0 -global-isel-abort=1 bswap.ll -mtriple=arm64-apple-ios -o - .section __TEXT,__text,regular,pure_instructions .globl _test1 ; -- Begin function test1 .p2align 2 _test1: ; @test1 .cfi_startproc ; %bb.0: ld1.h { v0 }[0], [x0] add x8, x0, #2 ; =2 ld1.h { v0 }[2], [x8] rev32.8b v0, v0 ushr.2s v0, v0, #16 mov.s w8, v0[1] fmov w9, s0 strh w9, [x0] strh w8, [x0, #2] ret .cfi_endproc ; -- End function .subsections_via_symbols
GISel:
$ ./bin/llc -global-isel=1 -global-isel-abort=1 bswap.ll -mtriple=arm64-apple-ios -o - .section __TEXT,__text,regular,pure_instructions .globl _test1 ; -- Begin function test1 .p2align 2 _test1: ; @test1 .cfi_startproc ; %bb.0: ldr h0, [x0] ldr h1, [x0, #2] mov.h v0[1], v1[0] mov.s v0[0], v0[0] rev32.8b v0, v0 ushr.2s v0, v0, #16 mov h1, v0[1] str h0, [x0] str h1, [x0, #2] ret .cfi_endproc ; -- End function .subsections_via_symbols
SDAG:
$ ./bin/llc -global-isel=0 -global-isel-abort=1 bswap.ll -mtriple=arm64-apple-ios -o - .section __TEXT,__text,regular,pure_instructions .globl _test1 ; -- Begin function test1 .p2align 2 _test1: ; @test1 .cfi_startproc ; %bb.0: ld1.h { v0 }[0], [x0] add x8, x0, #2 ; =2 ld1.h { v0 }[2], [x8] rev32.8b v0, v0 ushr.2s v0, v0, #16 mov.s w8, v0[1] fmov w9, s0 strh w9, [x0] strh w8, [x0, #2] ret .cfi_endproc ; -- End function .subsections_via_symbols
GISel:
$ ./bin/llc -global-isel=1 -global-isel-abort=1 bswap.ll -mtriple=arm64-apple-ios -o - .section __TEXT,__text,regular,pure_instructions .globl _test1 ; -- Begin function test1 .p2align 2 _test1: ; @test1 .cfi_startproc ; %bb.0: ldr h0, [x0] ldr h1, [x0, #2] mov.h v0[1], v1[0] rev32.8b v0, v0 ushr.2s v0, v0, #16 mov h1, v0[1] str h0, [x0] str h1, [x0, #2] ret .cfi_endproc ; -- End function .subsections_via_symbols
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | ||
---|---|---|
1016 | Do you think you could add an explanation somewhere for why the v2s16 case is special? |
Aside from the comment, I think this looks pretty good at this point? The codegen differences between SDAG and GISel seem unrelated to the bswap at this point.
Explain why <2 x half> is weird, and why we're not directly selecting the instructions we want during legalization.
Do you think you could add an explanation somewhere for why the v2s16 case is special?