Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4767,6 +4767,18 @@ } } + // Any FP binop with an undef operand is folded to NaN. This matches the + // behavior of the IR optimizer. + switch (Opcode) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (N1.isUndef() || N2.isUndef()) + return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); + } + // Canonicalize an UNDEF to the RHS, even over a constant. if (N1.isUndef()) { if (TLI->isCommutativeBinOp(Opcode)) { @@ -4776,9 +4788,6 @@ case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: case ISD::SUB: - case ISD::FSUB: - case ISD::FDIV: - case ISD::FREM: return getUNDEF(VT); // fold op(undef, arg2) -> undef case ISD::UDIV: case ISD::SDIV: @@ -4813,14 +4822,6 @@ case ISD::SRL: case ISD::SHL: return getUNDEF(VT); // fold op(arg1, undef) -> undef - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - case ISD::FDIV: - case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) - return N2; - break; case ISD::MUL: case ISD::AND: return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 Index: llvm/trunk/test/CodeGen/AArch64/fcvt_combine.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fcvt_combine.ll +++ llvm/trunk/test/CodeGen/AArch64/fcvt_combine.ll @@ -100,9 +100,8 @@ ret <2 x i32> %vcvt.i } -; Don't combine all undefs. +; Combine all undefs. ; CHECK-LABEL: test10 -; CHECK: fmul.2s v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}} ; CHECK: ret define <2 x i32> @test10(<2 x float> %f) { Index: llvm/trunk/test/CodeGen/AMDGPU/mad-mix-lo.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/mad-mix-lo.ll +++ llvm/trunk/test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -145,10 +145,13 @@ ; FIXME: Should be packed into 2 registers per argument? ; GCN-LABEL: {{^}}v_mad_mix_v3f32_clamp_postcvt: ; GCN: s_waitcnt -; GFX9-NEXT: v_mad_mixlo_f16 v2, v2, v5, v8 op_sel_hi:[1,1,1] clamp -; GFX9-NEXT: v_mad_mixhi_f16 v2, v0, v0, v0 clamp +; GFX9-NEXT: v_mad_mixlo_f16 v2, v2, v5, v8 op_sel_hi:[1,1,1] ; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v3, v6 op_sel_hi:[1,1,1] clamp +; GFX9-NEXT: s_movk_i32 s6, 0x7e00 +; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX9-NEXT: v_lshl_or_b32 v2, s6, 16, v2 ; GFX9-NEXT: v_mad_mixhi_f16 v0, v1, v4, v7 op_sel_hi:[1,1,1] clamp +; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 clamp ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0 ; GFX9-NEXT: s_setpc_b64 define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { Index: llvm/trunk/test/CodeGen/NVPTX/implicit-def.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/implicit-def.ll +++ llvm/trunk/test/CodeGen/NVPTX/implicit-def.ll @@ -1,9 +0,0 @@ -; RUN: llc < %s -O0 -march=nvptx -mcpu=sm_20 -asm-verbose=1 | FileCheck %s - -; CHECK: // implicit-def: %f[[F0:[0-9]+]] -; CHECK: add.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f[[F0]]; -define float @foo(float %a) { - %ret = fadd float %a, undef - ret float %ret -} - Index: llvm/trunk/test/CodeGen/X86/fp-undef.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fp-undef.ll +++ llvm/trunk/test/CodeGen/X86/fp-undef.ll @@ -6,27 +6,19 @@ ; adding something here, you should probably add it there too. define float @fadd_undef_op0(float %x) { -; STRICT-LABEL: fadd_undef_op0: -; STRICT: # %bb.0: -; STRICT-NEXT: addss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_op0: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_op0: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fadd float undef, %x ret float %r } define float @fadd_undef_op1(float %x) { -; STRICT-LABEL: fadd_undef_op1: -; STRICT: # %bb.0: -; STRICT-NEXT: addss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_op1: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_op1: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fadd float %x, undef ret float %r } @@ -34,46 +26,35 @@ define float @fsub_undef_op0(float %x) { ; ANY-LABEL: fsub_undef_op0: ; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub float undef, %x ret float %r } define float @fsub_undef_op1(float %x) { -; STRICT-LABEL: fsub_undef_op1: -; STRICT: # %bb.0: -; STRICT-NEXT: subss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_undef_op1: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fsub_undef_op1: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fsub float %x, undef ret float %r } define float @fmul_undef_op0(float %x) { -; STRICT-LABEL: fmul_undef_op0: -; STRICT: # %bb.0: -; STRICT-NEXT: mulss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_op0: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_op0: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fmul float undef, %x ret float %r } define float @fmul_undef_op1(float %x) { -; STRICT-LABEL: fmul_undef_op1: -; STRICT: # %bb.0: -; STRICT-NEXT: mulss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_op1: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_op1: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fmul float %x, undef ret float %r } @@ -81,20 +62,17 @@ define float @fdiv_undef_op0(float %x) { ; ANY-LABEL: fdiv_undef_op0: ; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv float undef, %x ret float %r } define float @fdiv_undef_op1(float %x) { -; STRICT-LABEL: fdiv_undef_op1: -; STRICT: # %bb.0: -; STRICT-NEXT: divss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fdiv_undef_op1: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fdiv_undef_op1: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fdiv float %x, undef ret float %r } @@ -102,19 +80,17 @@ define float @frem_undef_op0(float %x) { ; ANY-LABEL: frem_undef_op0: ; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem float undef, %x ret float %r } define float @frem_undef_op1(float %x) { -; STRICT-LABEL: frem_undef_op1: -; STRICT: # %bb.0: -; STRICT-NEXT: jmp fmodf # TAILCALL -; -; UNSAFE-LABEL: frem_undef_op1: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: frem_undef_op1: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = frem float %x, undef ret float %r } @@ -122,27 +98,19 @@ ; Repeat all tests with fast-math-flags. Alternate 'nnan' and 'fast' for more coverage. define float @fadd_undef_op0_nnan(float %x) { -; STRICT-LABEL: fadd_undef_op0_nnan: -; STRICT: # %bb.0: -; STRICT-NEXT: addss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_op0_nnan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_op0_nnan: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fadd nnan float undef, %x ret float %r } define float @fadd_undef_op1_fast(float %x) { -; STRICT-LABEL: fadd_undef_op1_fast: -; STRICT: # %bb.0: -; STRICT-NEXT: addss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_op1_fast: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_op1_fast: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fadd fast float %x, undef ret float %r } @@ -150,46 +118,35 @@ define float @fsub_undef_op0_fast(float %x) { ; ANY-LABEL: fsub_undef_op0_fast: ; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub fast float undef, %x ret float %r } define float @fsub_undef_op1_nnan(float %x) { -; STRICT-LABEL: fsub_undef_op1_nnan: -; STRICT: # %bb.0: -; STRICT-NEXT: subss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_undef_op1_nnan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fsub_undef_op1_nnan: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fsub nnan float %x, undef ret float %r } define float @fmul_undef_op0_nnan(float %x) { -; STRICT-LABEL: fmul_undef_op0_nnan: -; STRICT: # %bb.0: -; STRICT-NEXT: mulss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_op0_nnan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_op0_nnan: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fmul nnan float undef, %x ret float %r } define float @fmul_undef_op1_fast(float %x) { -; STRICT-LABEL: fmul_undef_op1_fast: -; STRICT: # %bb.0: -; STRICT-NEXT: mulss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_op1_fast: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_op1_fast: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fmul fast float %x, undef ret float %r } @@ -197,20 +154,17 @@ define float @fdiv_undef_op0_fast(float %x) { ; ANY-LABEL: fdiv_undef_op0_fast: ; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv fast float undef, %x ret float %r } define float @fdiv_undef_op1_nnan(float %x) { -; STRICT-LABEL: fdiv_undef_op1_nnan: -; STRICT: # %bb.0: -; STRICT-NEXT: divss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fdiv_undef_op1_nnan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fdiv_undef_op1_nnan: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fdiv nnan float %x, undef ret float %r } @@ -218,19 +172,17 @@ define float @frem_undef_op0_nnan(float %x) { ; ANY-LABEL: frem_undef_op0_nnan: ; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem nnan float undef, %x ret float %r } define float @frem_undef_op1_fast(float %x) { -; STRICT-LABEL: frem_undef_op1_fast: -; STRICT: # %bb.0: -; STRICT-NEXT: jmp fmodf # TAILCALL -; -; UNSAFE-LABEL: frem_undef_op1_fast: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: frem_undef_op1_fast: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = frem fast float %x, undef ret float %r } @@ -238,14 +190,10 @@ ; Constant folding - undef undef. define double @fadd_undef_undef(double %x) { -; STRICT-LABEL: fadd_undef_undef: -; STRICT: # %bb.0: -; STRICT-NEXT: addsd %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_undef: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_undef: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fadd double undef, undef ret double %r } @@ -253,20 +201,17 @@ define double @fsub_undef_undef(double %x) { ; ANY-LABEL: fsub_undef_undef: ; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fsub double undef, undef ret double %r } define double @fmul_undef_undef(double %x) { -; STRICT-LABEL: fmul_undef_undef: -; STRICT: # %bb.0: -; STRICT-NEXT: mulsd %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_undef: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_undef: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fmul double undef, undef ret double %r } @@ -274,6 +219,7 @@ define double @fdiv_undef_undef(double %x) { ; ANY-LABEL: fdiv_undef_undef: ; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fdiv double undef, undef ret double %r @@ -282,6 +228,7 @@ define double @frem_undef_undef(double %x) { ; ANY-LABEL: frem_undef_undef: ; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = frem double undef, undef ret double %r @@ -290,27 +237,19 @@ ; Constant folding. define float @fadd_undef_op0_nnan_constant(float %x) { -; STRICT-LABEL: fadd_undef_op0_nnan_constant: -; STRICT: # %bb.0: -; STRICT-NEXT: addss {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_op0_nnan_constant: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_op0_nnan_constant: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fadd nnan float undef, 1.0 ret float %r } define float @fadd_undef_op1_constant(float %x) { -; STRICT-LABEL: fadd_undef_op1_constant: -; STRICT: # %bb.0: -; STRICT-NEXT: addss {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_op1_constant: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_op1_constant: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fadd float 2.0, undef ret float %r } @@ -318,47 +257,35 @@ define float @fsub_undef_op0_fast_constant(float %x) { ; ANY-LABEL: fsub_undef_op0_fast_constant: ; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub fast float undef, 3.0 ret float %r } define float @fsub_undef_op1_constant(float %x) { -; STRICT-LABEL: fsub_undef_op1_constant: -; STRICT: # %bb.0: -; STRICT-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; STRICT-NEXT: subss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_undef_op1_constant: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fsub_undef_op1_constant: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fsub float 4.0, undef ret float %r } define float @fmul_undef_op0_nnan_constant(float %x) { -; STRICT-LABEL: fmul_undef_op0_nnan_constant: -; STRICT: # %bb.0: -; STRICT-NEXT: mulss {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_op0_nnan_constant: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_op0_nnan_constant: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fmul nnan float undef, 5.0 ret float %r } define float @fmul_undef_op1_constant(float %x) { -; STRICT-LABEL: fmul_undef_op1_constant: -; STRICT: # %bb.0: -; STRICT-NEXT: mulss {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_op1_constant: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_op1_constant: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fmul float 6.0, undef ret float %r } @@ -366,21 +293,17 @@ define float @fdiv_undef_op0_fast_constant(float %x) { ; ANY-LABEL: fdiv_undef_op0_fast_constant: ; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv fast float undef, 7.0 ret float %r } define float @fdiv_undef_op1_constant(float %x) { -; STRICT-LABEL: fdiv_undef_op1_constant: -; STRICT: # %bb.0: -; STRICT-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; STRICT-NEXT: divss %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fdiv_undef_op1_constant: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fdiv_undef_op1_constant: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = fdiv float 8.0, undef ret float %r } @@ -388,20 +311,17 @@ define float @frem_undef_op0_nnan_constant(float %x) { ; ANY-LABEL: frem_undef_op0_nnan_constant: ; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem nnan float undef, 9.0 ret float %r } define float @frem_undef_op1_constant(float %x) { -; STRICT-LABEL: frem_undef_op1_constant: -; STRICT: # %bb.0: -; STRICT-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; STRICT-NEXT: jmp fmodf # TAILCALL -; -; UNSAFE-LABEL: frem_undef_op1_constant: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: frem_undef_op1_constant: +; ANY: # %bb.0: +; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ANY-NEXT: retq %r = frem float 10.0, undef ret float %r } @@ -409,27 +329,19 @@ ; Constant folding - special constants: NaN. define double @fadd_undef_op0_constant_nan(double %x) { -; STRICT-LABEL: fadd_undef_op0_constant_nan: -; STRICT: # %bb.0: -; STRICT-NEXT: addsd {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_op0_constant_nan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_op0_constant_nan: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fadd double undef, 0x7FF8000000000000 ret double %r } define double @fadd_undef_op1_fast_constant_nan(double %x) { -; STRICT-LABEL: fadd_undef_op1_fast_constant_nan: -; STRICT: # %bb.0: -; STRICT-NEXT: addsd {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_op1_fast_constant_nan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_op1_fast_constant_nan: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fadd fast double 0xFFF0000000000001, undef ret double %r } @@ -437,47 +349,35 @@ define double @fsub_undef_op0_constant_nan(double %x) { ; ANY-LABEL: fsub_undef_op0_constant_nan: ; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fsub double undef, 0xFFF8000000000010 ret double %r } define double @fsub_undef_op1_nnan_constant_nan(double %x) { -; STRICT-LABEL: fsub_undef_op1_nnan_constant_nan: -; STRICT: # %bb.0: -; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; STRICT-NEXT: subsd %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_undef_op1_nnan_constant_nan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fsub_undef_op1_nnan_constant_nan: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fsub nnan double 0x7FF0000000000011, undef ret double %r } define double @fmul_undef_op0_constant_nan(double %x) { -; STRICT-LABEL: fmul_undef_op0_constant_nan: -; STRICT: # %bb.0: -; STRICT-NEXT: mulsd {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_op0_constant_nan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_op0_constant_nan: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fmul double undef, 0x7FF8000000000100 ret double %r } define double @fmul_undef_op1_fast_constant_nan(double %x) { -; STRICT-LABEL: fmul_undef_op1_fast_constant_nan: -; STRICT: # %bb.0: -; STRICT-NEXT: mulsd {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_op1_fast_constant_nan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_op1_fast_constant_nan: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fmul fast double 0xFFF0000000000101, undef ret double %r } @@ -485,21 +385,17 @@ define double @fdiv_undef_op0_constant_nan(double %x) { ; ANY-LABEL: fdiv_undef_op0_constant_nan: ; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fdiv double undef, 0xFFF8000000000110 ret double %r } define double @fdiv_undef_op1_nnan_constant_nan(double %x) { -; STRICT-LABEL: fdiv_undef_op1_nnan_constant_nan: -; STRICT: # %bb.0: -; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; STRICT-NEXT: divsd %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fdiv_undef_op1_nnan_constant_nan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fdiv_undef_op1_nnan_constant_nan: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fdiv nnan double 0x7FF0000000000111, undef ret double %r } @@ -507,20 +403,17 @@ define double @frem_undef_op0_constant_nan(double %x) { ; ANY-LABEL: frem_undef_op0_constant_nan: ; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = frem double undef, 0x7FF8000000001000 ret double %r } define double @frem_undef_op1_fast_constant_nan(double %x) { -; STRICT-LABEL: frem_undef_op1_fast_constant_nan: -; STRICT: # %bb.0: -; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; STRICT-NEXT: jmp fmod # TAILCALL -; -; UNSAFE-LABEL: frem_undef_op1_fast_constant_nan: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: frem_undef_op1_fast_constant_nan: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = frem fast double 0xFFF0000000001001, undef ret double %r } @@ -528,27 +421,19 @@ ; Constant folding - special constants: Inf. define double @fadd_undef_op0_constant_inf(double %x) { -; STRICT-LABEL: fadd_undef_op0_constant_inf: -; STRICT: # %bb.0: -; STRICT-NEXT: addsd {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_op0_constant_inf: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_op0_constant_inf: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fadd double undef, 0x7FF0000000000000 ret double %r } define double @fadd_undef_op1_fast_constant_inf(double %x) { -; STRICT-LABEL: fadd_undef_op1_fast_constant_inf: -; STRICT: # %bb.0: -; STRICT-NEXT: addsd {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_undef_op1_fast_constant_inf: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_undef_op1_fast_constant_inf: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fadd fast double 0xFFF0000000000000, undef ret double %r } @@ -556,47 +441,35 @@ define double @fsub_undef_op0_constant_inf(double %x) { ; ANY-LABEL: fsub_undef_op0_constant_inf: ; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fsub double undef, 0xFFF0000000000000 ret double %r } define double @fsub_undef_op1_ninf_constant_inf(double %x) { -; STRICT-LABEL: fsub_undef_op1_ninf_constant_inf: -; STRICT: # %bb.0: -; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; STRICT-NEXT: subsd %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fsub_undef_op1_ninf_constant_inf: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fsub_undef_op1_ninf_constant_inf: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fsub ninf double 0x7FF0000000000000, undef ret double %r } define double @fmul_undef_op0_constant_inf(double %x) { -; STRICT-LABEL: fmul_undef_op0_constant_inf: -; STRICT: # %bb.0: -; STRICT-NEXT: mulsd {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_op0_constant_inf: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_op0_constant_inf: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fmul double undef, 0x7FF0000000000000 ret double %r } define double @fmul_undef_op1_fast_constant_inf(double %x) { -; STRICT-LABEL: fmul_undef_op1_fast_constant_inf: -; STRICT: # %bb.0: -; STRICT-NEXT: mulsd {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fmul_undef_op1_fast_constant_inf: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fmul_undef_op1_fast_constant_inf: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fmul fast double 0xFFF0000000000000, undef ret double %r } @@ -604,21 +477,17 @@ define double @fdiv_undef_op0_constant_inf(double %x) { ; ANY-LABEL: fdiv_undef_op0_constant_inf: ; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fdiv double undef, 0xFFF0000000000000 ret double %r } define double @fdiv_undef_op1_ninf_constant_inf(double %x) { -; STRICT-LABEL: fdiv_undef_op1_ninf_constant_inf: -; STRICT: # %bb.0: -; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; STRICT-NEXT: divsd %xmm0, %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fdiv_undef_op1_ninf_constant_inf: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fdiv_undef_op1_ninf_constant_inf: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = fdiv ninf double 0x7FF0000000000000, undef ret double %r } @@ -626,20 +495,17 @@ define double @frem_undef_op0_constant_inf(double %x) { ; ANY-LABEL: frem_undef_op0_constant_inf: ; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = frem double undef, 0x7FF0000000000000 ret double %r } define double @frem_undef_op1_fast_constant_inf(double %x) { -; STRICT-LABEL: frem_undef_op1_fast_constant_inf: -; STRICT: # %bb.0: -; STRICT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; STRICT-NEXT: jmp fmod # TAILCALL -; -; UNSAFE-LABEL: frem_undef_op1_fast_constant_inf: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: frem_undef_op1_fast_constant_inf: +; ANY: # %bb.0: +; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ANY-NEXT: retq %r = frem fast double 0xFFF0000000000000, undef ret double %r } Index: llvm/trunk/test/CodeGen/X86/pr23103.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr23103.ll +++ llvm/trunk/test/CodeGen/X86/pr23103.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx < %s | FileCheck %s ; When commuting a VADDSDrr instruction, verify that the 'IsUndef' flag is @@ -8,11 +9,15 @@ define <1 x double> @pr23103(<1 x double>* align 8 %Vp) { ; CHECK-LABEL: pr23103: -; CHECK: vmovsd (%rdi), %xmm0 -; CHECK-NEXT: vmovsd %xmm0, {{.*}}(%rsp) {{.*#+}} 8-byte Spill +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: callq foo -; CHECK-NEXT: vaddsd {{.*}}(%rsp), %xmm0, %xmm0 {{.*#+}} 8-byte Folded Reload -; CHECK: retq +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %V = load <1 x double>, <1 x double>* %Vp, align 8 %call = call zeroext i1 @foo(<1 x double> %V) Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fadd.ll @@ -755,31 +755,26 @@ define float @test_v2f32_undef(<2 x float> %a0) { ; SSE2-LABEL: test_v2f32_undef: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: addss %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: addss %xmm1, %xmm0 +; SSE2-NEXT: addss {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32_undef: ; SSE41: # %bb.0: -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: addss %xmm0, %xmm0 -; SSE41-NEXT: addss %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE41-NEXT: addss {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32_undef: ; AVX: # %bb.0: -; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm1 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vaddss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float undef, <2 x float> %a0) ret float %1 @@ -789,23 +784,19 @@ ; SSE2-LABEL: test_v4f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: addss %xmm0, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] +; SSE2-NEXT: addss {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] +; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm2 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] -; SSE2-NEXT: addss %xmm2, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SSE2-NEXT: addss %xmm1, %xmm0 +; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v4f32_undef: ; SSE41: # %bb.0: -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: addss %xmm0, %xmm1 -; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE41-NEXT: addss %xmm2, %xmm1 +; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: addss {{.*}}(%rip), %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 ; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm2 @@ -815,9 +806,8 @@ ; ; AVX-LABEL: test_v4f32_undef: ; AVX: # %bb.0: -; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm1 -; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; AVX-NEXT: vaddss %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] @@ -826,9 +816,8 @@ ; ; AVX512-LABEL: test_v4f32_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vaddss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] @@ -842,15 +831,13 @@ ; SSE2-LABEL: test_v8f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: addss %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] +; SSE2-NEXT: addss {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] +; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] ; SSE2-NEXT: addss %xmm2, %xmm3 -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] -; SSE2-NEXT: addss %xmm3, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SSE2-NEXT: addss %xmm2, %xmm0 +; SSE2-NEXT: addss %xmm3, %xmm0 ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] @@ -864,10 +851,8 @@ ; ; SSE41-LABEL: test_v8f32_undef: ; SSE41: # %bb.0: -; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: addss %xmm0, %xmm2 -; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] -; SSE41-NEXT: addss %xmm3, %xmm2 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE41-NEXT: addss {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 ; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] ; SSE41-NEXT: addss %xmm2, %xmm3 @@ -885,9 +870,8 @@ ; ; AVX-LABEL: test_v8f32_undef: ; AVX: # %bb.0: -; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm1 -; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; AVX-NEXT: vaddss %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] @@ -905,9 +889,8 @@ ; ; AVX512-LABEL: test_v8f32_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vaddss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] @@ -930,15 +913,13 @@ ; SSE2-LABEL: test_v16f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: addss %xmm0, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] +; SSE2-NEXT: addss {{.*}}(%rip), %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[2,3] +; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] ; SSE2-NEXT: addss %xmm4, %xmm5 -; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] -; SSE2-NEXT: addss %xmm5, %xmm4 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SSE2-NEXT: addss %xmm4, %xmm0 +; SSE2-NEXT: addss %xmm5, %xmm0 ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] @@ -970,10 +951,8 @@ ; ; SSE41-LABEL: test_v16f32_undef: ; SSE41: # %bb.0: -; SSE41-NEXT: movaps %xmm0, %xmm4 -; SSE41-NEXT: addss %xmm0, %xmm4 -; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] -; SSE41-NEXT: addss %xmm5, %xmm4 +; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] +; SSE41-NEXT: addss {{.*}}(%rip), %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 ; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] ; SSE41-NEXT: addss %xmm4, %xmm5 @@ -1007,9 +986,8 @@ ; ; AVX-LABEL: test_v16f32_undef: ; AVX: # %bb.0: -; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] -; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX-NEXT: vaddss {{.*}}(%rip), %xmm2, %xmm2 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm2 ; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3] @@ -1042,9 +1020,8 @@ ; ; AVX512-LABEL: test_v16f32_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vaddss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; AVX512-NEXT: vaddss %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] @@ -1629,24 +1606,20 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: addsd %xmm0, %xmm1 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: addsd %xmm1, %xmm0 +; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2f64_undef: ; AVX: # %bb.0: -; AVX-NEXT: vaddsd %xmm0, %xmm0, %xmm1 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f64_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vaddsd %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double undef, <2 x double> %a0) ret double %1 @@ -1655,10 +1628,8 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; SSE-LABEL: test_v4f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm2 -; SSE-NEXT: addsd %xmm0, %xmm2 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: addsd %xmm2, %xmm0 +; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 @@ -1666,9 +1637,8 @@ ; ; AVX-LABEL: test_v4f64_undef: ; AVX: # %bb.0: -; AVX-NEXT: vaddsd %xmm0, %xmm0, %xmm1 -; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX-NEXT: vaddsd %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm1 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -1678,9 +1648,8 @@ ; ; AVX512-LABEL: test_v4f64_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vaddsd %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -1694,10 +1663,8 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; SSE-LABEL: test_v8f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm4 -; SSE-NEXT: addsd %xmm0, %xmm4 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: addsd %xmm4, %xmm0 +; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 @@ -1711,9 +1678,8 @@ ; ; AVX-LABEL: test_v8f64_undef: ; AVX: # %bb.0: -; AVX-NEXT: vaddsd %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm2, %xmm2 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX-NEXT: vaddsd %xmm0, %xmm2, %xmm2 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -1730,9 +1696,8 @@ ; ; AVX512-LABEL: test_v8f64_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vaddsd %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX512-NEXT: vaddsd %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] @@ -1754,10 +1719,8 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; SSE-LABEL: test_v16f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm8 -; SSE-NEXT: addsd %xmm0, %xmm8 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: addsd %xmm8, %xmm0 +; SSE-NEXT: addsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: addsd %xmm1, %xmm0 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm1, %xmm0 @@ -1783,9 +1746,8 @@ ; ; AVX-LABEL: test_v16f64_undef: ; AVX: # %bb.0: -; AVX-NEXT: vaddsd %xmm0, %xmm0, %xmm4 -; AVX-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0] -; AVX-NEXT: vaddsd %xmm5, %xmm4, %xmm4 +; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] +; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm4, %xmm4 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX-NEXT: vaddsd %xmm0, %xmm4, %xmm4 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -1816,9 +1778,8 @@ ; ; AVX512-LABEL: test_v16f64_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vaddsd %xmm0, %xmm0, %xmm2 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm2, %xmm2 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX512-NEXT: vaddsd %xmm3, %xmm2, %xmm2 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0] Index: llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-fmul.ll @@ -647,31 +647,26 @@ define float @test_v2f32_undef(<2 x float> %a0) { ; SSE2-LABEL: test_v2f32_undef: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: mulss %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: mulss %xmm1, %xmm0 +; SSE2-NEXT: mulss {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32_undef: ; SSE41: # %bb.0: -; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: mulss %xmm0, %xmm0 -; SSE41-NEXT: mulss %xmm1, %xmm0 +; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE41-NEXT: mulss {{.*}}(%rip), %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32_undef: ; AVX: # %bb.0: -; AVX-NEXT: vmulss %xmm0, %xmm0, %xmm1 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vmulss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float undef, <2 x float> %a0) ret float %1 @@ -681,23 +676,19 @@ ; SSE2-LABEL: test_v4f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: mulss %xmm0, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] +; SSE2-NEXT: mulss {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] +; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm2 -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] -; SSE2-NEXT: mulss %xmm2, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SSE2-NEXT: mulss %xmm1, %xmm0 +; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v4f32_undef: ; SSE41: # %bb.0: -; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: mulss %xmm0, %xmm1 -; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE41-NEXT: mulss %xmm2, %xmm1 +; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE41-NEXT: mulss {{.*}}(%rip), %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 ; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm2 @@ -707,9 +698,8 @@ ; ; AVX-LABEL: test_v4f32_undef: ; AVX: # %bb.0: -; AVX-NEXT: vmulss %xmm0, %xmm0, %xmm1 -; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] @@ -718,9 +708,8 @@ ; ; AVX512-LABEL: test_v4f32_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vmulss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] @@ -734,15 +723,13 @@ ; SSE2-LABEL: test_v8f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: mulss %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] +; SSE2-NEXT: mulss {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] +; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] ; SSE2-NEXT: mulss %xmm2, %xmm3 -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] -; SSE2-NEXT: mulss %xmm3, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SSE2-NEXT: mulss %xmm2, %xmm0 +; SSE2-NEXT: mulss %xmm3, %xmm0 ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] @@ -756,10 +743,8 @@ ; ; SSE41-LABEL: test_v8f32_undef: ; SSE41: # %bb.0: -; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: mulss %xmm0, %xmm2 -; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] -; SSE41-NEXT: mulss %xmm3, %xmm2 +; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE41-NEXT: mulss {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 ; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] ; SSE41-NEXT: mulss %xmm2, %xmm3 @@ -777,9 +762,8 @@ ; ; AVX-LABEL: test_v8f32_undef: ; AVX: # %bb.0: -; AVX-NEXT: vmulss %xmm0, %xmm0, %xmm1 -; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] @@ -797,9 +781,8 @@ ; ; AVX512-LABEL: test_v8f32_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vmulss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] @@ -822,15 +805,13 @@ ; SSE2-LABEL: test_v16f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: mulss %xmm0, %xmm4 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] +; SSE2-NEXT: mulss {{.*}}(%rip), %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[2,3] +; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] ; SSE2-NEXT: mulss %xmm4, %xmm5 -; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] -; SSE2-NEXT: mulss %xmm5, %xmm4 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SSE2-NEXT: mulss %xmm4, %xmm0 +; SSE2-NEXT: mulss %xmm5, %xmm0 ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] @@ -862,10 +843,8 @@ ; ; SSE41-LABEL: test_v16f32_undef: ; SSE41: # %bb.0: -; SSE41-NEXT: movaps %xmm0, %xmm4 -; SSE41-NEXT: mulss %xmm0, %xmm4 -; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] -; SSE41-NEXT: mulss %xmm5, %xmm4 +; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] +; SSE41-NEXT: mulss {{.*}}(%rip), %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 ; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] ; SSE41-NEXT: mulss %xmm4, %xmm5 @@ -899,9 +878,8 @@ ; ; AVX-LABEL: test_v16f32_undef: ; AVX: # %bb.0: -; AVX-NEXT: vmulss %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] -; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] +; AVX-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm2 ; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3] @@ -934,9 +912,8 @@ ; ; AVX512-LABEL: test_v16f32_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vmulss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] ; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] @@ -1426,24 +1403,20 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: mulsd %xmm0, %xmm1 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: mulsd %xmm1, %xmm0 +; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2f64_undef: ; AVX: # %bb.0: -; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm1 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f64_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vmulsd %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double undef, <2 x double> %a0) ret double %1 @@ -1452,10 +1425,8 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; SSE-LABEL: test_v4f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm2 -; SSE-NEXT: mulsd %xmm0, %xmm2 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: mulsd %xmm2, %xmm0 +; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 @@ -1463,9 +1434,8 @@ ; ; AVX-LABEL: test_v4f64_undef: ; AVX: # %bb.0: -; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm1 -; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX-NEXT: vmulsd %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm1 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -1475,9 +1445,8 @@ ; ; AVX512-LABEL: test_v4f64_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vmulsd %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -1491,10 +1460,8 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; SSE-LABEL: test_v8f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm4 -; SSE-NEXT: mulsd %xmm0, %xmm4 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: mulsd %xmm4, %xmm0 +; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 @@ -1508,9 +1475,8 @@ ; ; AVX-LABEL: test_v8f64_undef: ; AVX: # %bb.0: -; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm2 -; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm2 +; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm2, %xmm2 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX-NEXT: vmulsd %xmm0, %xmm2, %xmm2 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -1527,9 +1493,8 @@ ; ; AVX512-LABEL: test_v8f64_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vmulsd %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX512-NEXT: vmulsd %xmm2, %xmm1, %xmm1 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] @@ -1551,10 +1516,8 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; SSE-LABEL: test_v16f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm8 -; SSE-NEXT: mulsd %xmm0, %xmm8 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; SSE-NEXT: mulsd %xmm8, %xmm0 +; SSE-NEXT: mulsd {{.*}}(%rip), %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: mulsd %xmm1, %xmm0 @@ -1580,9 +1543,8 @@ ; ; AVX-LABEL: test_v16f64_undef: ; AVX: # %bb.0: -; AVX-NEXT: vmulsd %xmm0, %xmm0, %xmm4 -; AVX-NEXT: vpermilpd {{.*#+}} xmm5 = xmm0[1,0] -; AVX-NEXT: vmulsd %xmm5, %xmm4, %xmm4 +; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0] +; AVX-NEXT: vmulsd {{.*}}(%rip), %xmm4, %xmm4 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX-NEXT: vmulsd %xmm0, %xmm4, %xmm4 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] @@ -1613,9 +1575,8 @@ ; ; AVX512-LABEL: test_v16f64_undef: ; AVX512: # %bb.0: -; AVX512-NEXT: vmulsd %xmm0, %xmm0, %xmm2 -; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm2, %xmm2 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm3 ; AVX512-NEXT: vmulsd %xmm3, %xmm2, %xmm2 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]