diff --git a/llvm/test/Analysis/CostModel/AArch64/kryo.ll b/llvm/test/Analysis/CostModel/AArch64/kryo.ll --- a/llvm/test/Analysis/CostModel/AArch64/kryo.ll +++ b/llvm/test/Analysis/CostModel/AArch64/kryo.ll @@ -24,3 +24,27 @@ ret void } + +; CHECK-LABEL: vectorInstrExtractCost +define i64 @vectorInstrExtractCost(<4 x i64> %vecreg) { + + ; Vector extracts - extracting each element at index 0 is considered + ; free in the current implementation. When extracting element at index + ; 2, 2 is rounded to 0, so extracting element at index 2 has cost 0 as + ; well. + ; + ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 1 + ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 2 + %t1 = extractelement <4 x i64> %vecreg, i32 1 + %t2 = extractelement <4 x i64> %vecreg, i32 2 + %ele = add i64 %t2, 1 + %cond = icmp eq i64 %t1, %ele + + ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 0 + ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 3 + %t0 = extractelement <4 x i64> %vecreg, i32 0 + %t3 = extractelement <4 x i64> %vecreg, i32 3 + %val = select i1 %cond, i64 %t0 , i64 %t3 + + ret i64 %val +} diff --git a/llvm/test/Transforms/LICM/AArch64/extract-element.ll b/llvm/test/Transforms/LICM/AArch64/extract-element.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LICM/AArch64/extract-element.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -mtriple aarch64-linux-gnu -S < %s | FileCheck %s + +define i1 @func(ptr %0, i64 %1) { +; CHECK-LABEL: @func( +; CHECK-NEXT: br label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP12:%.*]], [[TMP11:%.*]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[TMP1:%.*]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[DOTSPLIT_LOOP_EXIT2:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <1 x i64>, ptr [[TMP0:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <1 x i64> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], -1 +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11]], label [[DOTSPLIT_LOOP_EXIT:%.*]] +; CHECK: 11: +; CHECK-NEXT: [[TMP12]] = add i64 [[TMP4]], 1 +; CHECK-NEXT: br label [[TMP3]] +; CHECK: .split.loop.exit: +; CHECK-NEXT: [[DOTLCSSA7:%.*]] = phi <1 x i64> [ [[TMP8]], [[TMP6]] ] +; CHECK-NEXT: [[DOTLCSSA6:%.*]] = phi i64 [ [[TMP4]], [[TMP6]] ] +; CHECK-NEXT: [[DOTPH:%.*]] = phi i1 [ [[TMP5]], [[TMP6]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[DOTLCSSA7]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], -1 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[DOTLCSSA6]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp uge i64 [[TMP15]], [[TMP1]] +; CHECK-NEXT: br label [[TMP17:%.*]] +; CHECK: .split.loop.exit2: +; CHECK-NEXT: [[DOTPH3:%.*]] = phi i1 [ [[TMP5]], [[TMP3]] ] +; CHECK-NEXT: [[DOTPH4:%.*]] = phi i1 [ undef, [[TMP3]] ] +; CHECK-NEXT: br label [[TMP17]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i1 [ [[TMP16]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ] +; CHECK-NEXT: [[TMP20:%.*]] = xor i1 [[TMP18]], true +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i1 true, i1 [[TMP19]] +; CHECK-NEXT: ret i1 [[TMP21]] +; + br label %3 + +3: ; preds = %14, %2 + %4 = phi i64 [ 0, %2 ], [ %15, %14 ] + %5 = icmp ult i64 %4, %1 + br i1 %5, label %6, label %16 + +6: ; preds = %3 + %7 = getelementptr inbounds <1 x i64>, ptr %0, i64 %4 + %8 = load <1 x i64>, ptr %7, align 8 + %9 = extractelement <1 x i64> %8, i64 0 + %10 = icmp eq i64 %9, -1 + %11 = xor i64 %9, -1 + %12 = add i64 %11, %4 + %13 = icmp uge i64 %12, %1 + br i1 %10, label %14, label %16 + +14: ; preds = %6 + %15 = add i64 %4, 1 + br label %3 + +16: ; preds = %3, %6 + %17 = phi i1 [ %5, %3 ], [ %5, %6 ] + %18 = phi i1 [ %13, %6 ], [ undef, %3 ] + %19 = xor i1 %17, true + %20 = select i1 %19, i1 true, i1 %18 + ret i1 %20 +}