diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll --- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll @@ -13,6 +13,17 @@ ret i32 %r } +define i32 @vscale_load_extract_idx_0(ptr %x) { +; CHECK-LABEL: @vscale_load_extract_idx_0( +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i32 0 +; CHECK-NEXT: ret i32 [[R]] +; + %lv = load , ptr %x + %r = extractelement %lv, i32 0 + ret i32 %r +} + ; If the original load had a smaller alignment than the scalar type, the ; smaller alignment should be used. define i32 @load_extract_idx_0_small_alignment(ptr %x) { @@ -48,6 +59,17 @@ ret i32 %r } +define i32 @vscale_load_extract_idx_2(ptr %x) { +; CHECK-LABEL: @vscale_load_extract_idx_2( +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i32 2 +; CHECK-NEXT: ret i32 [[R]] +; + %lv = load , ptr %x + %r = extractelement %lv, i32 2 + ret i32 %r +} + define i32 @load_extract_idx_3(ptr %x) { ; CHECK-LABEL: @load_extract_idx_3( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 @@ -72,6 +94,17 @@ ret i32 %r } +define i32 @vscale_load_extract_idx_4(ptr %x) { +; CHECK-LABEL: @vscale_load_extract_idx_4( +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i32 4 +; CHECK-NEXT: ret i32 [[R]] +; + %lv = load , ptr %x + %r = extractelement %lv, i32 4 + ret i32 %r +} + define i32 @load_extract_idx_var_i64(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 @@ -104,6 +137,25 @@ ret i32 %r } +define i32 @vscale_load_extract_idx_var_i64_known_valid_by_assume(ptr %x, i64 %idx) { +; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_assume( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: call void @maythrow() +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i64 [[IDX]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %cmp = icmp ult i64 %idx, 4 + call void @llvm.assume(i1 %cmp) + %lv = load , ptr %x + call void @maythrow() + %r = extractelement %lv, i64 %idx + ret i32 %r +} + declare i1 @cond() define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block(ptr %x, i64 %idx, i1 %c.1) { @@ -213,6 +265,23 @@ ret i32 %r } +define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume(ptr %x, i64 %idx) { +; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i64 [[IDX]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %cmp = icmp ult i64 %idx, 5 + call void @llvm.assume(i1 %cmp) + %lv = load , ptr %x + %r = extractelement %lv, i64 %idx + ret i32 %r +} + declare void @llvm.assume(i1) define i32 @load_extract_idx_var_i64_known_valid_by_and(ptr %x, i64 %idx) { @@ -230,6 +299,21 @@ ret i32 %r } +define i32 @vscale_load_extract_idx_var_i64_known_valid_by_and(ptr %x, i64 %idx) { +; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3 +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i64 [[IDX_CLAMPED]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %idx.clamped = and i64 %idx, 3 + %lv = load , ptr %x + %r = extractelement %lv, i64 %idx.clamped + ret i32 %r +} + define i32 @load_extract_idx_var_i64_known_valid_by_and_noundef(ptr %x, i64 noundef %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and_noundef( ; CHECK-NEXT: entry: @@ -260,6 +344,21 @@ ret i32 %r } +define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_and(ptr %x, i64 %idx) { +; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 4 +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i64 [[IDX_CLAMPED]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %idx.clamped = and i64 %idx, 4 + %lv = load , ptr %x + %r = extractelement %lv, i64 %idx.clamped + ret i32 %r +} + define i32 @load_extract_idx_var_i64_known_valid_by_urem(ptr %x, i64 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem( ; CHECK-NEXT: entry: @@ -275,6 +374,21 @@ ret i32 %r } +define i32 @vscale_load_extract_idx_var_i64_known_valid_by_urem(ptr %x, i64 %idx) { +; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_urem( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4 +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i64 [[IDX_CLAMPED]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %idx.clamped = urem i64 %idx, 4 + %lv = load , ptr %x + %r = extractelement %lv, i64 %idx.clamped + ret i32 %r +} + define i32 @load_extract_idx_var_i64_known_valid_by_urem_noundef(ptr %x, i64 noundef %idx) { ; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem_noundef( ; CHECK-NEXT: entry: @@ -305,6 +419,21 @@ ret i32 %r } +define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_urem(ptr %x, i64 %idx) { +; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_urem( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 5 +; CHECK-NEXT: [[LV:%.*]] = load , ptr [[X:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = extractelement [[LV]], i64 [[IDX_CLAMPED]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %idx.clamped = urem i64 %idx, 5 + %lv = load , ptr %x + %r = extractelement %lv, i64 %idx.clamped + ret i32 %r +} + define i32 @load_extract_idx_var_i32(ptr %x, i32 %idx) { ; CHECK-LABEL: @load_extract_idx_var_i32( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16