Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2477,6 +2477,22 @@ def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; +// Match stores from lane 0 to the appropriate subreg's store. +multiclass VecStoreULane0Pat { + defm : VecStoreLane0Pat; +} + +let AddedComplexity = 19 in { + defm : VecStoreULane0Pat; + defm : VecStoreULane0Pat; + defm : VecStoreULane0Pat; + defm : VecStoreULane0Pat; + defm : VecStoreULane0Pat; + defm : VecStoreULane0Pat; +} + //--- // STR mnemonics fall back to STUR for negative or unaligned offsets. def : InstAlias<"str $Rt, [$Rn, $offset]", Index: llvm/test/CodeGen/AArch64/arm64-st1.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-st1.ll +++ llvm/test/CodeGen/AArch64/arm64-st1.ll @@ -4,24 +4,26 @@ define void @st1lane_16b(<16 x i8> %A, i8* %D) { ; CHECK-LABEL: st1lane_16b -; CHECK: st1.b +; CHECK: st1.b { v{{[0-9]+}} }[1], [x{{[0-9]+}}] + %ptr = getelementptr i8, i8* %D, i64 1 %tmp = extractelement <16 x i8> %A, i32 1 - store i8 %tmp, i8* %D + store i8 %tmp, i8* %ptr ret void } define void @st1lane0_16b(<16 x i8> %A, i8* %D) { ; CHECK-LABEL: st1lane0_16b -; CHECK: st1.b +; CHECK: st1.b { v{{[0-9]+}} }[0], [x{{[0-9]+}}] + %ptr = getelementptr i8, i8* %D, i64 1 %tmp = extractelement <16 x i8> %A, i32 0 - store i8 %tmp, i8* %D + store i8 %tmp, i8* %ptr ret void } define void @st1lane0u_16b(<16 x i8> %A, i8* %D) { ; CHECK-LABEL: st1lane0u_16b -; CHECK: st1.b - %ptr = getelementptr i8, i8* %D, i64 -1 +; CHECK: st1.b { v{{[0-9]+}} }[0], [x{{[0-9]+}}] + %ptr = getelementptr i8, i8* %D, i64 -11 %tmp = extractelement <16 x i8> %A, i32 0 store i8 %tmp, i8* %ptr ret void @@ -29,8 +31,8 @@ define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) { ; CHECK-LABEL: st1lane_ro_16b -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.b { v0 }[1], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.b { v{{[0-9]+}} }[1], [x[[XREG]]] %ptr = getelementptr i8, i8* %D, i64 %offset %tmp = extractelement <16 x i8> %A, i32 1 store i8 %tmp, i8* %ptr @@ -39,8 +41,8 @@ define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_16b -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.b { v0 }[0], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.b { v{{[0-9]+}} }[0], [x[[XREG]]] %ptr = getelementptr i8, i8* %D, i64 %offset %tmp = extractelement <16 x i8> %A, i32 0 store i8 %tmp, i8* %ptr @@ -49,23 +51,25 @@ define void @st1lane_8h(<8 x i16> %A, i16* %D) { ; CHECK-LABEL: st1lane_8h -; CHECK: st1.h +; CHECK: st1.h { v{{[0-9]+}} }[1], [x{{[0-9]+}}] + %ptr = getelementptr i16, i16* %D, i64 1 %tmp = extractelement <8 x i16> %A, i32 1 - store i16 %tmp, i16* %D + store i16 %tmp, i16* %ptr ret void } define void @st1lane0_8h(<8 x i16> %A, i16* %D) { ; CHECK-LABEL: st1lane0_8h -; CHECK: str +; CHECK: str h{{[0-9]+}}, [x{{[0-9]+}}, #{{[0-9]+}}] + %ptr = getelementptr i16, i16* %D, i64 1 %tmp = extractelement <8 x i16> %A, i32 0 - store i16 %tmp, i16* %D + store i16 %tmp, i16* %ptr ret void } define void @st1lane0u_8h(<8 x i16> %A, i16* %D) { ; CHECK-LABEL: st1lane0u_8h -; CHECK: st1.h +; CHECK: stur h{{[0-9]+}}, [x{{[0-9]+}}, #-{{[0-9]+}}] %ptr = getelementptr i16, i16* %D, i64 -1 %tmp = extractelement <8 x i16> %A, i32 0 store i16 %tmp, i16* %ptr @@ -74,8 +78,8 @@ define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) { ; CHECK-LABEL: st1lane_ro_8h -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.h { v0 }[1], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.h { v{{[0-9]+}} }[1], [x[[XREG]]] %ptr = getelementptr i16, i16* %D, i64 %offset %tmp = extractelement <8 x i16> %A, i32 1 store i16 %tmp, i16* %ptr @@ -84,7 +88,7 @@ define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_8h -; CHECK: str h0, [x0, x1, lsl #1] +; CHECK: str h{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #1] %ptr = getelementptr i16, i16* %D, i64 %offset %tmp = extractelement <8 x i16> %A, i32 0 store i16 %tmp, i16* %ptr @@ -93,23 +97,25 @@ define void @st1lane_4s(<4 x i32> %A, i32* %D) { ; CHECK-LABEL: st1lane_4s -; CHECK: st1.s +; CHECK: st1.s { v{{[0-9]+}} }[1], [x{{[0-9]+}}] + %ptr = getelementptr i32, i32* %D, i64 1 %tmp = extractelement <4 x i32> %A, i32 1 - store i32 %tmp, i32* %D + store i32 %tmp, i32* %ptr ret void } define void @st1lane0_4s(<4 x i32> %A, i32* %D) { ; CHECK-LABEL: st1lane0_4s -; CHECK: str +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #{{[0-9]+}}] + %ptr = getelementptr i32, i32* %D, i64 1 %tmp = extractelement <4 x i32> %A, i32 0 - store i32 %tmp, i32* %D + store i32 %tmp, i32* %ptr ret void } define void @st1lane0u_4s(<4 x i32> %A, i32* %D) { ; CHECK-LABEL: st1lane0u_4s -; CHECK: st1.s +; CHECK: stur s{{[0-9]+}}, [x{{[0-9]+}}, #-{{[0-9]+}}] %ptr = getelementptr i32, i32* %D, i64 -1 %tmp = extractelement <4 x i32> %A, i32 0 store i32 %tmp, i32* %ptr @@ -118,8 +124,8 @@ define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) { ; CHECK-LABEL: st1lane_ro_4s -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.s { v0 }[1], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.s { v{{[0-9]+}} }[1], [x[[XREG]]] %ptr = getelementptr i32, i32* %D, i64 %offset %tmp = extractelement <4 x i32> %A, i32 1 store i32 %tmp, i32* %ptr @@ -128,7 +134,7 @@ define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_4s -; CHECK: str s0, [x0, x1, lsl #2] +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #2] %ptr = getelementptr i32, i32* %D, i64 %offset %tmp = extractelement <4 x i32> %A, i32 0 store i32 %tmp, i32* %ptr @@ -137,23 +143,25 @@ define void @st1lane_4s_float(<4 x float> %A, float* %D) { ; CHECK-LABEL: st1lane_4s_float -; CHECK: st1.s +; CHECK: st1.s { v{{[0-9]+}} }[1], [x{{[0-9]+}}] + %ptr = getelementptr float, float* %D, i64 1 %tmp = extractelement <4 x float> %A, i32 1 - store float %tmp, float* %D + store float %tmp, float* %ptr ret void } define void @st1lane0_4s_float(<4 x float> %A, float* %D) { ; CHECK-LABEL: st1lane0_4s_float -; CHECK: str +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #{{[0-9]+}}] + %ptr = getelementptr float, float* %D, i64 1 %tmp = extractelement <4 x float> %A, i32 0 - store float %tmp, float* %D + store float %tmp, float* %ptr ret void } define void @st1lane0u_4s_float(<4 x float> %A, float* %D) { ; CHECK-LABEL: st1lane0u_4s_float -; CHECK: st1.s +; CHECK: stur s{{[0-9]+}}, [x{{[0-9]+}}, #-{{[0-9]+}}] %ptr = getelementptr float, float* %D, i64 -1 %tmp = extractelement <4 x float> %A, i32 0 store float %tmp, float* %ptr @@ -162,8 +170,8 @@ define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) { ; CHECK-LABEL: st1lane_ro_4s_float -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.s { v0 }[1], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.s { v{{[0-9]+}} }[1], [x[[XREG]]] %ptr = getelementptr float, float* %D, i64 %offset %tmp = extractelement <4 x float> %A, i32 1 store float %tmp, float* %ptr @@ -172,7 +180,7 @@ define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_4s_float -; CHECK: str s0, [x0, x1, lsl #2] +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #2] %ptr = getelementptr float, float* %D, i64 %offset %tmp = extractelement <4 x float> %A, i32 0 store float %tmp, float* %ptr @@ -181,23 +189,25 @@ define void @st1lane_2d(<2 x i64> %A, i64* %D) { ; CHECK-LABEL: st1lane_2d -; CHECK: st1.d +; CHECK: st1.d { v{{[0-9]+}} }[1], [x{{[0-9]+}}] + %ptr = getelementptr i64, i64* %D, i64 1 %tmp = extractelement <2 x i64> %A, i32 1 - store i64 %tmp, i64* %D + store i64 %tmp, i64* %ptr ret void } define void @st1lane0_2d(<2 x i64> %A, i64* %D) { ; CHECK-LABEL: st1lane0_2d -; CHECK: str +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #{{[0-9]+}}] + %ptr = getelementptr i64, i64* %D, i64 1 %tmp = extractelement <2 x i64> %A, i32 0 - store i64 %tmp, i64* %D + store i64 %tmp, i64* %ptr ret void } define void @st1lane0u_2d(<2 x i64> %A, i64* %D) { ; CHECK-LABEL: st1lane0u_2d -; CHECK: st1.d +; CHECK: stur d{{[0-9]+}}, [x{{[0-9]+}}, #-{{[0-9]+}}] %ptr = getelementptr i64, i64* %D, i64 -1 %tmp = extractelement <2 x i64> %A, i32 0 store i64 %tmp, i64* %ptr @@ -206,8 +216,8 @@ define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) { ; CHECK-LABEL: st1lane_ro_2d -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.d { v0 }[1], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.d { v{{[0-9]+}} }[1], [x[[XREG]]] %ptr = getelementptr i64, i64* %D, i64 %offset %tmp = extractelement <2 x i64> %A, i32 1 store i64 %tmp, i64* %ptr @@ -216,7 +226,7 @@ define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_2d -; CHECK: str d0, [x0, x1, lsl #3] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3] %ptr = getelementptr i64, i64* %D, i64 %offset %tmp = extractelement <2 x i64> %A, i32 0 store i64 %tmp, i64* %ptr @@ -225,23 +235,25 @@ define void @st1lane_2d_double(<2 x double> %A, double* %D) { ; CHECK-LABEL: st1lane_2d_double -; CHECK: st1.d +; CHECK: st1.d { v{{[0-9]+}} }[1], [x{{[0-9]+}}] + %ptr = getelementptr double, double* %D, i64 1 %tmp = extractelement <2 x double> %A, i32 1 - store double %tmp, double* %D + store double %tmp, double* %ptr ret void } define void @st1lane0_2d_double(<2 x double> %A, double* %D) { ; CHECK-LABEL: st1lane0_2d_double -; CHECK: str +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #{{[0-9]+}}] + %ptr = getelementptr double, double* %D, i64 1 %tmp = extractelement <2 x double> %A, i32 0 - store double %tmp, double* %D + store double %tmp, double* %ptr ret void } define void @st1lane0u_2d_double(<2 x double> %A, double* %D) { ; CHECK-LABEL: st1lane0u_2d_double -; CHECK: st1.d +; CHECK: stur d{{[0-9]+}}, [x{{[0-9]+}}, #-{{[0-9]+}}] %ptr = getelementptr double, double* %D, i64 -1 %tmp = extractelement <2 x double> %A, i32 0 store double %tmp, double* %ptr @@ -250,8 +262,8 @@ define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) { ; CHECK-LABEL: st1lane_ro_2d_double -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.d { v0 }[1], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.d { v{{[0-9]+}} }[1], [x[[XREG]]] %ptr = getelementptr double, double* %D, i64 %offset %tmp = extractelement <2 x double> %A, i32 1 store double %tmp, double* %ptr @@ -260,7 +272,7 @@ define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_2d_double -; CHECK: str d0, [x0, x1, lsl #3] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3] %ptr = getelementptr double, double* %D, i64 %offset %tmp = extractelement <2 x double> %A, i32 0 store double %tmp, double* %ptr @@ -269,16 +281,17 @@ define void @st1lane_8b(<8 x i8> %A, i8* %D) { ; CHECK-LABEL: st1lane_8b -; CHECK: st1.b +; CHECK: st1.b { v{{[0-9]+}} }[1], [x{{[0-9]+}}] + %ptr = getelementptr i8, i8* %D, i64 1 %tmp = extractelement <8 x i8> %A, i32 1 - store i8 %tmp, i8* %D + store i8 %tmp, i8* %ptr ret void } define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) { ; CHECK-LABEL: st1lane_ro_8b -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.b { v0 }[1], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.b { v{{[0-9]+}} }[1], [x[[XREG]]] %ptr = getelementptr i8, i8* %D, i64 %offset %tmp = extractelement <8 x i8> %A, i32 1 store i8 %tmp, i8* %ptr @@ -287,8 +300,8 @@ define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_8b -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.b { v0 }[0], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.b { v{{[0-9]+}} }[0], [x[[XREG]]] %ptr = getelementptr i8, i8* %D, i64 %offset %tmp = extractelement <8 x i8> %A, i32 0 store i8 %tmp, i8* %ptr @@ -297,23 +310,25 @@ define void @st1lane_4h(<4 x i16> %A, i16* %D) { ; CHECK-LABEL: st1lane_4h -; CHECK: st1.h +; CHECK: st1.h { v{{[0-9]+}} }[1], [x{{[0-9]+}}] + %ptr = getelementptr i16, i16* %D, i64 1 %tmp = extractelement <4 x i16> %A, i32 1 - store i16 %tmp, i16* %D + store i16 %tmp, i16* %ptr ret void } define void @st1lane0_4h(<4 x i16> %A, i16* %D) { ; CHECK-LABEL: st1lane0_4h -; CHECK: str +; CHECK: str h{{[0-9]+}}, [x{{[0-9]+}}, #{{[0-9]+}}] + %ptr = getelementptr i16, i16* %D, i64 1 %tmp = extractelement <4 x i16> %A, i32 0 - store i16 %tmp, i16* %D + store i16 %tmp, i16* %ptr ret void } define void @st1lane0u_4h(<4 x i16> %A, i16* %D) { ; CHECK-LABEL: st1lane0u_4h -; CHECK: st1.h +; CHECK: stur h{{[0-9]+}}, [x{{[0-9]+}}, #-{{[0-9]+}}] %ptr = getelementptr i16, i16* %D, i64 -1 %tmp = extractelement <4 x i16> %A, i32 0 store i16 %tmp, i16* %ptr @@ -322,8 +337,8 @@ define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) { ; CHECK-LABEL: st1lane_ro_4h -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.h { v0 }[1], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.h { v{{[0-9]+}} }[1], [x[[XREG]]] %ptr = getelementptr i16, i16* %D, i64 %offset %tmp = extractelement <4 x i16> %A, i32 1 store i16 %tmp, i16* %ptr @@ -332,7 +347,7 @@ define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_4h -; CHECK: str h0, [x0, x1, lsl #1] +; CHECK: str h{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #1] %ptr = getelementptr i16, i16* %D, i64 %offset %tmp = extractelement <4 x i16> %A, i32 0 store i16 %tmp, i16* %ptr @@ -341,23 +356,25 @@ define void @st1lane_2s(<2 x i32> %A, i32* %D) { ; CHECK-LABEL: st1lane_2s -; CHECK: st1.s +; CHECK: st1.s { v{{[0-9]+}} }[1], [x{{[0-9]+}}] + %ptr = getelementptr i32, i32* %D, i64 1 %tmp = extractelement <2 x i32> %A, i32 1 - store i32 %tmp, i32* %D + store i32 %tmp, i32* %ptr ret void } define void @st1lane0_2s(<2 x i32> %A, i32* %D) { ; CHECK-LABEL: st1lane0_2s -; CHECK: str +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #{{[0-9]+}}] + %ptr = getelementptr i32, i32* %D, i64 1 %tmp = extractelement <2 x i32> %A, i32 0 - store i32 %tmp, i32* %D + store i32 %tmp, i32* %ptr ret void } define void @st1lane0u_2s(<2 x i32> %A, i32* %D) { ; CHECK-LABEL: st1lane0u_2s -; CHECK: st1.s +; CHECK: stur s{{[0-9]+}}, [x{{[0-9]+}}, #-{{[0-9]+}}] %ptr = getelementptr i32, i32* %D, i64 -1 %tmp = extractelement <2 x i32> %A, i32 0 store i32 %tmp, i32* %ptr @@ -366,8 +383,8 @@ define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) { ; CHECK-LABEL: st1lane_ro_2s -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.s { v0 }[1], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.s { v{{[0-9]+}} }[1], [x[[XREG]]] %ptr = getelementptr i32, i32* %D, i64 %offset %tmp = extractelement <2 x i32> %A, i32 1 store i32 %tmp, i32* %ptr @@ -376,7 +393,7 @@ define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_2s -; CHECK: str s0, [x0, x1, lsl #2] +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #2] %ptr = getelementptr i32, i32* %D, i64 %offset %tmp = extractelement <2 x i32> %A, i32 0 store i32 %tmp, i32* %ptr @@ -385,23 +402,25 @@ define void @st1lane_2s_float(<2 x float> %A, float* %D) { ; CHECK-LABEL: st1lane_2s_float -; CHECK: st1.s +; CHECK: st1.s { v{{[0-9]+}} }[1], [x{{[0-9]+}}] + %ptr = getelementptr float, float* %D, i64 1 %tmp = extractelement <2 x float> %A, i32 1 - store float %tmp, float* %D + store float %tmp, float* %ptr ret void } define void @st1lane0_2s_float(<2 x float> %A, float* %D) { ; CHECK-LABEL: st1lane0_2s_float -; CHECK: str +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #{{[0-9]+}}] + %ptr = getelementptr float, float* %D, i64 1 %tmp = extractelement <2 x float> %A, i32 0 - store float %tmp, float* %D + store float %tmp, float* %ptr ret void } define void @st1lane0u_2s_float(<2 x float> %A, float* %D) { ; CHECK-LABEL: st1lane0u_2s_float -; CHECK: st1.s +; CHECK: stur s{{[0-9]+}}, [x{{[0-9]+}}, #-{{[0-9]+}}] %ptr = getelementptr float, float* %D, i64 -1 %tmp = extractelement <2 x float> %A, i32 0 store float %tmp, float* %ptr @@ -410,8 +429,8 @@ define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) { ; CHECK-LABEL: st1lane_ro_2s_float -; CHECK: add x[[XREG:[0-9]+]], x0, x1 -; CHECK: st1.s { v0 }[1], [x[[XREG]]] +; CHECK: add x[[XREG:[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}} +; CHECK: st1.s { v{{[0-9]+}} }[1], [x[[XREG]]] %ptr = getelementptr float, float* %D, i64 %offset %tmp = extractelement <2 x float> %A, i32 1 store float %tmp, float* %ptr @@ -420,7 +439,7 @@ define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_2s_float -; CHECK: str s0, [x0, x1, lsl #2] +; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #2] %ptr = getelementptr float, float* %D, i64 %offset %tmp = extractelement <2 x float> %A, i32 0 store float %tmp, float* %ptr @@ -429,15 +448,16 @@ define void @st1lane0_1d(<1 x i64> %A, i64* %D) { ; CHECK-LABEL: st1lane0_1d -; CHECK: str +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #{{[0-9]+}}] + %ptr = getelementptr i64, i64* %D, i64 1 %tmp = extractelement <1 x i64> %A, i32 0 - store i64 %tmp, i64* %D + store i64 %tmp, i64* %ptr ret void } define void @st1lane0u_1d(<1 x i64> %A, i64* %D) { ; CHECK-LABEL: st1lane0u_1d -; CHECK: st1.d +; CHECK: stur d{{[0-9]+}}, [x{{[0-9]+}}, #-{{[0-9]+}}] %ptr = getelementptr i64, i64* %D, i64 -1 %tmp = extractelement <1 x i64> %A, i32 0 store i64 %tmp, i64* %ptr @@ -446,7 +466,7 @@ define void @st1lane0_ro_1d(<1 x i64> %A, i64* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_1d -; CHECK: str d0, [x0, x1, lsl #3] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3] %ptr = getelementptr i64, i64* %D, i64 %offset %tmp = extractelement <1 x i64> %A, i32 0 store i64 %tmp, i64* %ptr @@ -455,15 +475,16 @@ define void @st1lane0_1d_double(<1 x double> %A, double* %D) { ; CHECK-LABEL: st1lane0_1d_double -; CHECK: str +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #{{[0-9]+}}] + %ptr = getelementptr double, double* %D, i64 1 %tmp = extractelement <1 x double> %A, i32 0 - store double %tmp, double* %D + store double %tmp, double* %ptr ret void } define void @st1lane0u_1d_double(<1 x double> %A, double* %D) { ; CHECK-LABEL: st1lane0u_1d_double -; CHECK: stur +; CHECK: stur d{{[0-9]+}}, [x{{[0-9]+}}, #-{{[0-9]+}}] %ptr = getelementptr double, double* %D, i64 -1 %tmp = extractelement <1 x double> %A, i32 0 store double %tmp, double* %ptr @@ -472,7 +493,7 @@ define void @st1lane0_ro_1d_double(<1 x double> %A, double* %D, i64 %offset) { ; CHECK-LABEL: st1lane0_ro_1d_double -; CHECK: str d0, [x0, x1, lsl #3] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3] %ptr = getelementptr double, double* %D, i64 %offset %tmp = extractelement <1 x double> %A, i32 0 store double %tmp, double* %ptr Index: llvm/test/CodeGen/AArch64/fp16-vector-load-store.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp16-vector-load-store.ll +++ llvm/test/CodeGen/AArch64/fp16-vector-load-store.ll @@ -99,7 +99,7 @@ define void @storeu_lane0_64(half* nocapture %a, <4 x half> %b) #1 { ; CHECK-LABEL: storeu_lane0_64: -; CHECK: st1 { v0.h }[0], [x{{[0-9]+}}] +; CHECK: stur h0, [x{{[0-9]+}}, #-2] entry: %0 = getelementptr half, half* %a, i64 -1 %1 = extractelement <4 x half> %b, i32 0 @@ -148,7 +148,7 @@ define void @storeu_lane0_128(half* nocapture %a, <8 x half> %b) #1 { ; CHECK-LABEL: storeu_lane0_128: -; CHECK: st1 { v0.h }[0], [x{{[0-9]+}}] +; CHECK: stur h0, [x{{[0-9]+}}, #-2] entry: %0 = getelementptr half, half* %a, i64 -1 %1 = extractelement <8 x half> %b, i32 0