Index: lib/IR/Instruction.cpp =================================================================== --- lib/IR/Instruction.cpp +++ lib/IR/Instruction.cpp @@ -602,7 +602,8 @@ switch (Opcode) { case FMul: case FAdd: - return cast(this)->isFast(); + return cast(this)->hasAllowReassoc() && + cast(this)->hasNoSignedZeros(); default: return false; } Index: test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll =================================================================== --- test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll +++ test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll @@ -1,10 +1,145 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s -; CHECK: mul -; CHECK: mul -define <4 x float> @test(<4 x float> %V) { - %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > ; <<4 x float>> [#uses=1] - %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > ; <<4 x float>> [#uses=1] +; (V * C1) * C2 => V * (C1 * C2) +; Verify this doesn't fold when no fast-math-flags are specified +define <4 x float> @test_fmul(<4 x float> %V) { +; CHECK-LABEL: @test_fmul( +; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], +; CHECK-NEXT: ret <4 x float> [[TMP2]] + %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > ret <4 x float> %Z } +; (V * C1) * C2 => V * (C1 * C2) +; Verify this folds with 'fast' +define <4 x float> @test_fmul_fast(<4 x float> %V) { +; CHECK-LABEL: @test_fmul_fast( +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[V:%.*]], +; CHECK-NEXT: ret <4 x float> [[TMP1]] + %Y = fmul fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Z = fmul fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > + ret <4 x float> %Z +} + +; (V * C1) * C2 => V * (C1 * C2) +; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required) +define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) { +; CHECK-LABEL: @test_fmul_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz <4 x float> [[V:%.*]], +; CHECK-NEXT: ret <4 x float> [[TMP1]] + %Y = fmul reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Z = fmul reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > + ret <4 x float> %Z +} + +; (V * C1) * C2 => V * (C1 * C2) +; TODO: This doesn't require 'nsz'. It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 } +define <4 x float> @test_fmul_reassoc(<4 x float> %V) { +; CHECK-LABEL: @test_fmul_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], +; CHECK-NEXT: ret <4 x float> [[TMP2]] + %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 > + ret <4 x float> %Z +} + +; (V + C1) + C2 => V + (C1 + C2) +; Verify this doesn't fold when no fast-math-flags are specified +define <4 x float> @test_fadd(<4 x float> %V) { +; CHECK-LABEL: @test_fadd( +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[V:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], +; CHECK-NEXT: ret <4 x float> [[TMP2]] + %Y = fadd <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Z = fadd <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 > + ret <4 x float> %Z +} + +; (V + C1) + C2 => V + (C1 + C2) +; Verify this folds with 'fast' +define <4 x float> @test_fadd_fast(<4 x float> %V) { +; CHECK-LABEL: @test_fadd_fast( +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[V:%.*]], +; CHECK-NEXT: ret <4 x float> [[TMP1]] + %Y = fadd fast <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Z = fadd fast <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 > + ret <4 x float> %Z +} + +; (V + C1) + C2 => V + (C1 + C2) +; Verify this folds with 'reassoc' and 'nsz' ('nsz' not technically required) +define <4 x float> @test_fadd_reassoc_nsz(<4 x float> %V) { +; CHECK-LABEL: @test_fadd_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[V:%.*]], +; CHECK-NEXT: ret <4 x float> [[TMP1]] + %Y = fadd reassoc nsz <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Z = fadd reassoc nsz <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 > + ret <4 x float> %Z +} + +; (V + C1) + C2 => V + (C1 + C2) +; TODO: This doesn't require 'nsz'. It should fold to V + { 2.0, 4.0, 0.0, 8.0 } +define <4 x float> @test_fadd_reassoc(<4 x float> %V) { +; CHECK-LABEL: @test_fadd_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[V:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[TMP1]], +; CHECK-NEXT: ret <4 x float> [[TMP2]] + %Y = fadd reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Z = fadd reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+00, float -3.000000e+00, float 4.000000e+00 > + ret <4 x float> %Z +} + +; ( A + C1 ) + ( B + -C1 ) +; Verify this doesn't fold when no fast-math-flags are specified +define <4 x float> @test_fadds_cancel_(<4 x float> %A, <4 x float> %B) { +; CHECK-LABEL: @test_fadds_cancel_( +; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] + %X = fadd <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Y = fadd <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 > + %Z = fadd <4 x float> %X, %Y + ret <4 x float> %Z +} + +; ( A + C1 ) + ( B + -C1 ) +; Verify this folds to 'A + B' with 'fast' +define <4 x float> @test_fadds_cancel_fast(<4 x float> %A, <4 x float> %B) { +; CHECK-LABEL: @test_fadds_cancel_fast( +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <4 x float> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret <4 x float> [[TMP1]] + %X = fadd fast <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Y = fadd fast <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 > + %Z = fadd fast <4 x float> %X, %Y + ret <4 x float> %Z +} + +; ( A + C1 ) + ( B + -C1 ) +; Verify this folds to 'A + B' with 'reassoc' and 'nsz' ('nsz' is required) +define <4 x float> @test_fadds_cancel_reassoc_nsz(<4 x float> %A, <4 x float> %B) { +; CHECK-LABEL: @test_fadds_cancel_reassoc_nsz( +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz <4 x float> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: ret <4 x float> [[TMP1]] + %X = fadd reassoc nsz <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Y = fadd reassoc nsz <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 > + %Z = fadd reassoc nsz <4 x float> %X, %Y + ret <4 x float> %Z +} + +; ( A + C1 ) + ( B + -C1 ) +; Verify the fold is not done with only 'reassoc' ('nsz' is required). +define <4 x float> @test_fadds_cancel_reassoc(<4 x float> %A, <4 x float> %B) { +; CHECK-LABEL: @test_fadds_cancel_reassoc( +; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc <4 x float> [[A:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc <4 x float> [[B:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] + %X = fadd reassoc <4 x float> %A, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > + %Y = fadd reassoc <4 x float> %B, < float -1.000000e+00, float -2.000000e+00, float -3.000000e+00, float -4.000000e+00 > + %Z = fadd reassoc <4 x float> %X, %Y + ret <4 x float> %Z +}