Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -4810,11 +4810,11 @@ }]>; def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (pre_store node:$val, node:$ptr, node:$offset), [{ - return cast(N)->getAlignment() == 2; + return cast(N)->getAlignment() >= 2; }]>; def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (post_store node:$val, node:$ptr, node:$offset), [{ - return cast(N)->getAlignment() == 2; + return cast(N)->getAlignment() >= 2; }]>; def alignedmaskedload32 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), Index: llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll +++ llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: @@ -720,11 +721,18 @@ } define i8* @ldrwi32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* @@ -735,11 +743,18 @@ } define i8* @ldrhi16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* @@ -772,12 +787,19 @@ ret i8* %x } -define i8* @ldrwf32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +define i8* @ldrf32_align1(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x float>* @@ -787,12 +809,19 @@ ret i8* %x } -define i8* @ldrwf16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +define i8* @ldrf16_align1(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x half>* @@ -802,6 +831,27 @@ ret i8* %x } +define i8* @ldrh16_align8(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrh16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4] +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrh16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4] +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + @@ -1294,11 +1344,18 @@ } define i8* @strwi32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -1309,11 +1366,18 @@ } define i8* @strhi16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1347,11 +1411,18 @@ } define i8* @strf32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -1362,11 +1433,18 @@ } define i8* @strf16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -1375,3 +1453,24 @@ store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %y } + +define i8* @strf16_align8(i8* %y, i8* %x) { +; CHECK-LE-LABEL: strf16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrw.32 q0, [r0, #16] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 16 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 8 + ret i8* %y +} Index: llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll +++ llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: @@ -17,11 +18,18 @@ } define i8* @ldrwu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwu32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwu32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwu32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -291,11 +299,18 @@ } define i8* @ldrhu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhu16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhu16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhu16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -708,11 +723,19 @@ } define i8* @ldrwi32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -723,11 +746,19 @@ } define i8* @ldrhi16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -762,11 +793,19 @@ } define i8* @ldrf32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -777,11 +816,19 @@ } define i8* @ldrf16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0], #3 -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -791,16 +838,43 @@ ret i8* %z } +define i8* @ldrh16_align8(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrh16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r0], #4 +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrh16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0], #4 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + define i8* @strw32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i32>* @@ -811,11 +885,18 @@ } define i8* @strw32_3(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -826,11 +907,17 @@ } define i8* @strw32_m4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_m4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #-4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_m4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #-4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_m4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0], #-4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 %0 = bitcast i8* %x to <4 x i32>* @@ -982,11 +1069,17 @@ define i8* @strh16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i16>* @@ -997,11 +1090,18 @@ } define i8* @strh16_3(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1012,11 +1112,17 @@ } define i8* @strh16_2(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_2: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #2 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_2: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #2 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_2: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #2 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <8 x i16>* @@ -1244,11 +1350,17 @@ } define i8* @strf32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x float>* @@ -1259,11 +1371,17 @@ } define i8* @strf16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #4 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x half>* @@ -1274,11 +1392,19 @@ } define i8* @strwi32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -1289,11 +1415,19 @@ } define i8* @strhi16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1328,11 +1462,19 @@ } define i8* @strf32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -1343,11 +1485,19 @@ } define i8* @strf16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0], #3 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -1356,3 +1506,24 @@ store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %z } + +define i8* @strf16_align8(i8* %y, i8* %x) { +; CHECK-LE-LABEL: strf16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0], #16 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0], #16 +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 16 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 8 + ret i8* %z +} Index: llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll +++ llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: @@ -17,11 +18,18 @@ } define i8* @ldrwu32_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwu32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwu32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwu32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrw.u32 q0, [r0] +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* @@ -291,11 +299,18 @@ } define i8* @ldrhu16_3(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhu16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhu16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhu16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrh.u16 q0, [r0] +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* @@ -708,11 +723,19 @@ } define i8* @ldrwi32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x i32>* @@ -723,11 +746,19 @@ } define i8* @ldrhi16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x i16>* @@ -762,11 +793,19 @@ } define i8* @ldrf32_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrw.32 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrw.32 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrw.32 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <4 x float>* @@ -777,11 +816,19 @@ } define i8* @ldrf16_align1(i8* %x, i8* %y) { -; CHECK-LABEL: ldrf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! -; CHECK-NEXT: vstrh.16 q0, [r1] -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: ldrf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %x, i32 3 %0 = bitcast i8* %z to <8 x half>* @@ -791,16 +838,43 @@ ret i8* %z } +define i8* @ldrh16_align8(i8* %x, i8* %y) { +; CHECK-LE-LABEL: ldrh16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]! +; CHECK-LE-NEXT: vstrh.16 q0, [r1] +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: ldrh16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]! +; CHECK-BE-NEXT: vstrh.16 q0, [r1] +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 8 + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + define i8* @strw32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x i32>* @@ -811,11 +885,18 @@ } define i8* @strw32_3(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -826,11 +907,17 @@ } define i8* @strw32_m4(i8* %y, i8* %x) { -; CHECK-LABEL: strw32_m4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #-4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strw32_m4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #-4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strw32_m4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0, #-4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 %0 = bitcast i8* %x to <4 x i32>* @@ -982,11 +1069,17 @@ define i8* @strh16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x i16>* @@ -997,11 +1090,18 @@ } define i8* @strh16_3(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_3: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_3: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_3: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0] +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1012,11 +1112,17 @@ } define i8* @strh16_2(i8* %y, i8* %x) { -; CHECK-LABEL: strh16_2: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #2]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strh16_2: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #2]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strh16_2: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #2]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 %0 = bitcast i8* %x to <8 x i16>* @@ -1244,11 +1350,17 @@ } define i8* @strf32_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <4 x float>* @@ -1259,11 +1371,17 @@ } define i8* @strf16_4(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_4: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_4: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]! +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 %0 = bitcast i8* %x to <8 x half>* @@ -1274,11 +1392,19 @@ } define i8* @strwi32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strwi32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strwi32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strwi32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x i32>* @@ -1289,11 +1415,19 @@ } define i8* @strhi16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strhi16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strhi16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strhi16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x i16>* @@ -1327,11 +1461,19 @@ } define i8* @strf32_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf32_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf32_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrw.u32 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf32_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrw.u32 q0, [r1] +; CHECK-BE-NEXT: vrev32.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <4 x float>* @@ -1342,11 +1484,19 @@ } define i8* @strf16_align1(i8* %y, i8* %x) { -; CHECK-LABEL: strf16_align1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3]! -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: strf16_align1: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vrev16.8 q0, q0 +; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3] +; CHECK-BE-NEXT: adds r0, #3 +; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 %0 = bitcast i8* %x to <8 x half>* @@ -1355,3 +1505,24 @@ store <8 x half> %1, <8 x half>* %2, align 1 ret i8* %z } + +define i8* @strf16_align8(i8* %y, i8* %x) { +; CHECK-LE-LABEL: strf16_align8: +; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: vldrh.u16 q0, [r1] +; CHECK-LE-NEXT: vstrb.8 q0, [r0, #16]! +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: strf16_align8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: vldrh.u16 q0, [r1] +; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]! +; CHECK-BE-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 16 + %0 = bitcast i8* %x to <8 x i16>* + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 8 + ret i8* %z +}