diff --git a/llvm/test/Analysis/CostModel/AArch64/splat-load.ll b/llvm/test/Analysis/CostModel/AArch64/splat-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/splat-load.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes='print' 2>&1 -disable-output | FileCheck %s + +; This test checks that the cost of a splat-load shuffle is correctly detected +; as 0, because the combined load + broadcast is lowered to a `ld1r` instruction. +; +; NOTE: The code in this test is a hack. Since TTI cannot currently detect a +; proper broadcast pattern from a scalar load (like the one that follows), +; we use a vector load as the shuffle's operand to trigger the pattern. +; +; %load = load double, double *%ptr +; %insert = insertelement <2 x double> poison, double %load, i32 0 +; %bcast = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer + + +define void @splat_load_2xdouble(<2 x double> *%ptr) { +; CHECK-LABEL: 'splat_load_2xdouble' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, <2 x double>* %ptr, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <2 x double>, <2 x double> *%ptr + %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer + ret void +} + +define void @splat_load_2xfloat(<2 x float> *%ptr) { +; CHECK-LABEL: 'splat_load_2xfloat' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x float>, <2 x float>* %ptr, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x float> %load, <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <2 x float>, <2 x float> *%ptr + %splat_load = shufflevector <2 x float> %load, <2 x float> poison, <2 x i32> zeroinitializer + ret void +} + +define void @splat_load_4xfloat(<4 x float> *%ptr) { +; CHECK-LABEL: 'splat_load_4xfloat' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x float>, <4 x float>* %ptr, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <4 x float>, <4 x float> *%ptr + %splat_load = shufflevector <4 x float> %load, <4 x float> poison, <4 x i32> zeroinitializer + ret void +} + +define void @splat_load_2xi32(<2 x i32> *%ptr) { +; CHECK-LABEL: 'splat_load_2xi32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x i32>, <2 x i32>* %ptr, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x i32> %load, <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <2 x i32>, <2 x i32> *%ptr + %splat_load = shufflevector <2 x i32> %load, <2 x i32> poison, <2 x i32> zeroinitializer + ret void +} + +define void @splat_load_4xi32(<4 x i32> *%ptr) { +; CHECK-LABEL: 'splat_load_4xi32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i32>, <4 x i32>* %ptr, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <4 x i32>, <4 x i32> *%ptr + %splat_load = shufflevector <4 x i32> %load, <4 x i32> poison, <4 x i32> zeroinitializer + ret void +} + +define void @splat_load_4xi16(<4 x i16> *%ptr) { +; CHECK-LABEL: 'splat_load_4xi16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <4 x i16>, <4 x i16>* %ptr, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <4 x i16> %load, <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <4 x i16>, <4 x i16> *%ptr + %splat_load = shufflevector <4 x i16> %load, <4 x i16> poison, <4 x i32> zeroinitializer + ret void +} + +define void @splat_load_8xi16(<8 x i16> *%ptr) { +; CHECK-LABEL: 'splat_load_8xi16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i16>, <8 x i16>* %ptr, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <8 x i16>, <8 x i16> *%ptr + %splat_load = shufflevector <8 x i16> %load, <8 x i16> poison, <8 x i32> zeroinitializer + ret void +} + +define void @splat_load_8xi8(<8 x i8> *%ptr) { +; CHECK-LABEL: 'splat_load_8xi8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <8 x i8>, <8 x i8>* %ptr, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <8 x i8> %load, <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <8 x i8>, <8 x i8> *%ptr + %splat_load = shufflevector <8 x i8> %load, <8 x i8> poison, <8 x i32> zeroinitializer + ret void +} + +define void @splat_load_16xi8(<16 x i8> *%ptr) { +; CHECK-LABEL: 'splat_load_16xi8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <16 x i8>, <16 x i8>* %ptr, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <16 x i8>, <16 x i8> *%ptr + %splat_load = shufflevector <16 x i8> %load, <16 x i8> poison, <16 x i32> zeroinitializer + ret void +} + +; `<2 x i8>` is not supported by `ld1r` so the shuffle cost should not be 0. +define void @splat_load_2xi8(<2 x i8> *%ptr) { +; CHECK-LABEL: 'splat_load_2xi8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load <2 x i8>, <2 x i8>* %ptr, align 2 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x i8> %load, <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <2 x i8>, <2 x i8> *%ptr + %splat_load = shufflevector <2 x i8> %load, <2 x i8> poison, <2 x i32> zeroinitializer + ret void +} + +; `<4 x i8>` is not supported by `ld1r` so the shuffle cost should not be 0. +define void @splat_load_4xi8(<4 x i8> *%ptr) { +; CHECK-LABEL: 'splat_load_4xi8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %load = load <4 x i8>, <4 x i8>* %ptr, align 4 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <4 x i8> %load, <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <4 x i8>, <4 x i8> *%ptr + %splat_load = shufflevector <4 x i8> %load, <4 x i8> poison, <4 x i32> zeroinitializer + ret void +} + +; `<2 x i16>` is not supported by `ld1r`, so the shuffle cost should not be 0. +define void @splat_load_2xi16(<2 x i16> *%ptr) { +; CHECK-LABEL: 'splat_load_2xi16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %load = load <2 x i16>, <2 x i16>* %ptr, align 4 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x i16> %load, <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <2 x i16>, <2 x i16> *%ptr + %splat_load = shufflevector <2 x i16> %load, <2 x i16> poison, <2 x i32> zeroinitializer + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/splat-load.ll b/llvm/test/Analysis/CostModel/X86/splat-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/splat-load.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s -check-prefixes=SSE3 + +; This test checks that the cost of a splat-load shuffle is correctly detected +; as 0, because the combined load + broadcast is lowered to a `movddup` instr. +; +; TODO: AVX `vbroadcast*` seems to support more types than the +; 2xdouble type of `movddup`: +; - `vbroadcastss` supports 4xfloat, 8xfloat +; - `vbroadcastsd` supports 4xdouble + +; NOTE: The code in this test is a hack. Since TTI cannot currently detect a +; proper broadcast pattern from a scalar load (like the one that follows), +; we use a vector load as the shuffle's operand to trigger the pattern. +; +; %load = load double, double *%ptr +; %insert = insertelement <2 x double> poison, double %load, i32 0 +; %bcast = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer + +define void @splat_load_2xdouble(<2 x double> *%ptr) { +; SSE2-LABEL: 'splat_load_2xdouble' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, <2 x double>* %ptr, align 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'splat_load_2xdouble' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x double>, <2 x double>* %ptr, align 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <2 x double>, <2 x double> *%ptr + %splat_load = shufflevector <2 x double> %load, <2 x double> poison, <2 x i32> zeroinitializer + ret void +} + +define void @splat_load_2xfloat(<2 x float> *%ptr) { +; SSE2-LABEL: 'splat_load_2xfloat' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x float>, <2 x float>* %ptr, align 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x float> %load, <2 x float> poison, <2 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'splat_load_2xfloat' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %load = load <2 x float>, <2 x float>* %ptr, align 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat_load = shufflevector <2 x float> %load, <2 x float> poison, <2 x i32> zeroinitializer +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %load = load <2 x float>, <2 x float> *%ptr + %splat_load = shufflevector <2 x float> %load, <2 x float> poison, <2 x i32> zeroinitializer + ret void +}