Index: llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll =================================================================== --- llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll +++ llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll @@ -13,6 +13,17 @@ ret i32 %r } +define i32 @load_extract_idx_0_vscale(ptr %x) { +; CHECK-LABEL: @load_extract_idx_0_vscale( +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i32 3 +; CHECK-NEXT: ret i32 [[R]] +; + %lv = load , ptr %x + %r = extractelement %lv, i32 3 + ret i32 %r +} + ; If the original load had a smaller alignment than the scalar type, the ; smaller alignment should be used. define i32 @load_extract_idx_0_small_alignment(ptr %x) { @@ -48,6 +59,17 @@ ret i32 %r } +define i32 @load_extract_idx_2_vscale(ptr %x) { +; CHECK-LABEL: @load_extract_idx_2_vscale( +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i32 2 +; CHECK-NEXT: ret i32 [[R]] +; + %lv = load , ptr %x + %r = extractelement %lv, i32 2 + ret i32 %r +} + define i32 @load_extract_idx_3(ptr %x) { ; CHECK-LABEL: @load_extract_idx_3( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 @@ -72,6 +94,19 @@ ret i32 %r } +; This is not optimized due to the index value 4 exceeds the minimum number +; of elements. +define i32 @load_extract_idx_4_vscale(ptr %x) { +; CHECK-LABEL: @load_extract_idx_4_vscale( +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i32 4 +; CHECK-NEXT: ret i32 [[R]] +; + %lv = load , ptr %x + %r = extractelement %lv, i32 4 + ret i32 %r +} + define i32 @load_extract_idx_var_i64(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16