diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47349,9 +47349,10 @@ if (SDValue NotOp1 = IsNOT(Op1, DAG)) { if (peekThroughBitcasts(NotOp1) == peekThroughBitcasts(Op0)) { SDLoc DL(EFLAGS); - return DAG.getNode(EFLAGS.getOpcode(), DL, VT, - DAG.getBitcast(OpVT, NotOp1), - DAG.getAllOnesConstant(DL, OpVT)); + return DAG.getNode( + EFLAGS.getOpcode(), DL, VT, DAG.getBitcast(OpVT, NotOp1), + DAG.getBitcast(OpVT, + DAG.getAllOnesConstant(DL, NotOp1.getValueType()))); } } } diff --git a/llvm/test/CodeGen/X86/combine-testpd.ll b/llvm/test/CodeGen/X86/combine-testpd.ll --- a/llvm/test/CodeGen/X86/combine-testpd.ll +++ b/llvm/test/CodeGen/X86/combine-testpd.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 ; ; testz(~X,Y) -> testc(X,Y) @@ -226,6 +226,35 @@ ret i1 %not } +define void @combine_testp_v4f64(<4 x i64> %x){ +; AVX-LABEL: combine_testp_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX-NEXT: vtestpd %ymm1, %ymm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX2-LABEL: combine_testp_v4f64: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestpd %ymm1, %ymm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +entry: + %xor.i.i.i.i.i.i.i.i.i = xor <4 x i64> %x, + %.cast.i.i.i.i.i.i = bitcast <4 x i64> %xor.i.i.i.i.i.i.i.i.i to <4 x double> + %0 = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %.cast.i.i.i.i.i.i, <4 x double> %.cast.i.i.i.i.i.i) + %cmp.i.not.i.i.i.i.i.i = icmp eq i32 %0, 0 + br i1 %cmp.i.not.i.i.i.i.i.i, label %if.end3.i.i.i.i.i.i, label %end + +if.end3.i.i.i.i.i.i: ; preds = %entry + ret void + +end: ; preds = %entry + ret void +} + declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/combine-testps.ll b/llvm/test/CodeGen/X86/combine-testps.ll --- a/llvm/test/CodeGen/X86/combine-testps.ll +++ b/llvm/test/CodeGen/X86/combine-testps.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 ; ; testz(~X,Y) -> testc(X,Y) @@ -225,6 +225,35 @@ ret i1 %not } +define void @combine_testp_v8f32(<8 x i32> %x){ +; AVX-LABEL: combine_testp_v8f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX-NEXT: vtestps %ymm1, %ymm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +; +; AVX2-LABEL: combine_testp_v8f32: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vtestps %ymm1, %ymm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +entry: + %xor.i.i.i.i.i.i.i.i.i = xor <8 x i32> %x, + %.cast.i.i.i.i.i.i = bitcast <8 x i32> %xor.i.i.i.i.i.i.i.i.i to <8 x float> + %0 = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %.cast.i.i.i.i.i.i, <8 x float> %.cast.i.i.i.i.i.i) + %cmp.i.not.i.i.i.i.i.i = icmp eq i32 %0, 0 + br i1 %cmp.i.not.i.i.i.i.i.i, label %if.end3.i.i.i.i.i.i, label %end + +if.end3.i.i.i.i.i.i: ; preds = %entry + ret void + +end: ; preds = %entry + ret void +} + declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone