# Changeset View

Changeset View

# Standalone View

Standalone View

# llvm/trunk/test/CodeGen/Generic/vector-redux.ll

1 | ; RUN: llc < %s -debug-only=isel -o /dev/null 2>&1 | FileCheck %s | ||||
---|---|---|---|---|---|

2 | ; REQUIRES: asserts | ||||

3 | | ||||

4 | @a = global [1024 x i32] zeroinitializer, align 16 | ||||

5 | | ||||

6 | define float @reduce_add_float(float* nocapture readonly %a) { | ||||

7 | ; CHECK-LABEL: reduce_add_float | ||||

8 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

9 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

10 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

11 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

12 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

13 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

14 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

15 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

16 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

17 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

18 | ; CHECK: Detected a reduction operation: {{.*}} fadd fast | ||||

19 | ; | ||||

20 | entry: | ||||

21 | br label %vector.body | ||||

22 | | ||||

23 | vector.body: | ||||

24 | %index = phi i64 [ 0, %entry ], [ %index.next.4, %vector.body ] | ||||

25 | %vec.phi = phi <4 x float> [ zeroinitializer, %entry ], [ %28, %vector.body ] | ||||

26 | %vec.phi9 = phi <4 x float> [ zeroinitializer, %entry ], [ %29, %vector.body ] | ||||

27 | %0 = getelementptr inbounds float, float* %a, i64 %index | ||||

28 | %1 = bitcast float* %0 to <4 x float>* | ||||

29 | %wide.load = load <4 x float>, <4 x float>* %1, align 4 | ||||

30 | %2 = getelementptr float, float* %0, i64 4 | ||||

31 | %3 = bitcast float* %2 to <4 x float>* | ||||

32 | %wide.load10 = load <4 x float>, <4 x float>* %3, align 4 | ||||

33 | %4 = fadd fast <4 x float> %wide.load, %vec.phi | ||||

34 | %5 = fadd fast <4 x float> %wide.load10, %vec.phi9 | ||||

35 | %index.next = add nuw nsw i64 %index, 8 | ||||

36 | %6 = getelementptr inbounds float, float* %a, i64 %index.next | ||||

37 | %7 = bitcast float* %6 to <4 x float>* | ||||

38 | %wide.load.1 = load <4 x float>, <4 x float>* %7, align 4 | ||||

39 | %8 = getelementptr float, float* %6, i64 4 | ||||

40 | %9 = bitcast float* %8 to <4 x float>* | ||||

41 | %wide.load10.1 = load <4 x float>, <4 x float>* %9, align 4 | ||||

42 | %10 = fadd fast <4 x float> %wide.load.1, %4 | ||||

43 | %11 = fadd fast <4 x float> %wide.load10.1, %5 | ||||

44 | %index.next.1 = add nsw i64 %index, 16 | ||||

45 | %12 = getelementptr inbounds float, float* %a, i64 %index.next.1 | ||||

46 | %13 = bitcast float* %12 to <4 x float>* | ||||

47 | %wide.load.2 = load <4 x float>, <4 x float>* %13, align 4 | ||||

48 | %14 = getelementptr float, float* %12, i64 4 | ||||

49 | %15 = bitcast float* %14 to <4 x float>* | ||||

50 | %wide.load10.2 = load <4 x float>, <4 x float>* %15, align 4 | ||||

51 | %16 = fadd fast <4 x float> %wide.load.2, %10 | ||||

52 | %17 = fadd fast <4 x float> %wide.load10.2, %11 | ||||

53 | %index.next.2 = add nsw i64 %index, 24 | ||||

54 | %18 = getelementptr inbounds float, float* %a, i64 %index.next.2 | ||||

55 | %19 = bitcast float* %18 to <4 x float>* | ||||

56 | %wide.load.3 = load <4 x float>, <4 x float>* %19, align 4 | ||||

57 | %20 = getelementptr float, float* %18, i64 4 | ||||

58 | %21 = bitcast float* %20 to <4 x float>* | ||||

59 | %wide.load10.3 = load <4 x float>, <4 x float>* %21, align 4 | ||||

60 | %22 = fadd fast <4 x float> %wide.load.3, %16 | ||||

61 | %23 = fadd fast <4 x float> %wide.load10.3, %17 | ||||

62 | %index.next.3 = add nsw i64 %index, 32 | ||||

63 | %24 = getelementptr inbounds float, float* %a, i64 %index.next.3 | ||||

64 | %25 = bitcast float* %24 to <4 x float>* | ||||

65 | %wide.load.4 = load <4 x float>, <4 x float>* %25, align 4 | ||||

66 | %26 = getelementptr float, float* %24, i64 4 | ||||

67 | %27 = bitcast float* %26 to <4 x float>* | ||||

68 | %wide.load10.4 = load <4 x float>, <4 x float>* %27, align 4 | ||||

69 | %28 = fadd fast <4 x float> %wide.load.4, %22 | ||||

70 | %29 = fadd fast <4 x float> %wide.load10.4, %23 | ||||

71 | %index.next.4 = add nsw i64 %index, 40 | ||||

72 | %30 = icmp eq i64 %index.next.4, 1000 | ||||

73 | br i1 %30, label %middle.block, label %vector.body | ||||

74 | | ||||

75 | middle.block: | ||||

76 | %.lcssa15 = phi <4 x float> [ %29, %vector.body ] | ||||

77 | %.lcssa = phi <4 x float> [ %28, %vector.body ] | ||||

78 | %bin.rdx = fadd fast <4 x float> %.lcssa15, %.lcssa | ||||

79 | %rdx.shuf = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> | ||||

80 | %bin.rdx11 = fadd fast <4 x float> %bin.rdx, %rdx.shuf | ||||

81 | %rdx.shuf12 = shufflevector <4 x float> %bin.rdx11, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> | ||||

82 | %bin.rdx13 = fadd fast <4 x float> %bin.rdx11, %rdx.shuf12 | ||||

83 | %31 = extractelement <4 x float> %bin.rdx13, i32 0 | ||||

84 | ret float %31 | ||||

85 | } |