diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1260,6 +1260,11 @@ /// be used with SelectionDAG::getMemIntrinsicNode. static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500; +/// Whether this is bitwise logic opcode. +inline bool isBitwiseLogicOp(unsigned Opcode) { + return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR; +} + /// Get underlying scalar opcode for VECREDUCE opcode. /// For example ISD::AND for ISD::VECREDUCE_AND. NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5293,6 +5293,19 @@ if (GlobalAddressSDNode *GA = dyn_cast(N2)) return FoldSymbolOffset(Opcode, VT, GA, N1); + // If this is a bitwise logic opcode see if we can fold bitcasted ops. + // TODO: Can we generalize this and fold any bitcasted constant data? + if (ISD::isBitwiseLogicOp(Opcode) && N1->getOpcode() == ISD::BITCAST && + N2->getOpcode() == ISD::BITCAST) { + SDValue InnerN1 = peekThroughBitcasts(N1->getOperand(0)); + SDValue InnerN2 = peekThroughBitcasts(N2->getOperand(0)); + EVT InnerVT = InnerN1.getValueType(); + if (InnerVT == InnerN2.getValueType() && InnerVT.isInteger()) + if (SDValue C = + FoldConstantArithmetic(Opcode, DL, InnerVT, {InnerN1, InnerN2})) + return getBitcast(VT, C); + } + // For fixed width vectors, extract each constant element and fold them // individually. Either input may be an undef value. bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR || diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll b/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll @@ -158,13 +158,20 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adr r0, .LCPI14_0 ; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vbic q1, q1, q2 +; CHECK-NEXT: adr r0, .LCPI14_1 +; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vorr q0, q0, q1 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI14_0: +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .LCPI14_1: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 4294967295 @ 0xffffffff @@ -179,13 +186,20 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adr r0, .LCPI15_0 ; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vbic q1, q1, q2 +; CHECK-NEXT: adr r0, .LCPI15_1 +; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vorr q0, q0, q1 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI15_0: +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .LCPI15_1: ; CHECK-NEXT: .long 4294967295 @ 0xffffffff ; CHECK-NEXT: .long 4294967295 @ 0xffffffff ; CHECK-NEXT: .long 0 @ 0x0 diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -2161,21 +2161,13 @@ ; ; X86-SSE2-LABEL: constant_funnnel_v2i64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0] -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <4,u,14,u> -; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 -; X86-SSE2-NEXT: pandn %xmm3, %xmm5 -; X86-SSE2-NEXT: psrlq $1, %xmm1 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 -; X86-SSE2-NEXT: psrlq %xmm5, %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] -; X86-SSE2-NEXT: psrlq %xmm5, %xmm1 +; X86-SSE2-NEXT: psrlq $60, %xmm2 +; X86-SSE2-NEXT: psrlq $50, %xmm1 ; X86-SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] -; X86-SSE2-NEXT: pand %xmm3, %xmm4 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psllq %xmm4, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3] -; X86-SSE2-NEXT: psllq %xmm3, %xmm0 +; X86-SSE2-NEXT: psllq $4, %xmm1 +; X86-SSE2-NEXT: psllq $14, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; X86-SSE2-NEXT: orpd %xmm2, %xmm0 ; X86-SSE2-NEXT: retl @@ -2695,10 +2687,8 @@ ; X86-SSE2-LABEL: splatconstant_funnnel_v2i64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: psrlq $50, %xmm1 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] ; X86-SSE2-NEXT: psllq $14, %xmm0 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1] -; X86-SSE2-NEXT: orpd %xmm1, %xmm0 +; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: retl %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> ) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -1460,24 +1460,20 @@ ; ; X86-SSE2-LABEL: constant_funnnel_v2i64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [63,0,63,0] -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = <4,u,14,u> -; X86-SSE2-NEXT: pxor %xmm3, %xmm3 -; X86-SSE2-NEXT: psubq %xmm2, %xmm3 -; X86-SSE2-NEXT: pand %xmm1, %xmm2 -; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 -; X86-SSE2-NEXT: psllq %xmm2, %xmm4 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] -; X86-SSE2-NEXT: movdqa %xmm0, %xmm5 -; X86-SSE2-NEXT: psllq %xmm2, %xmm5 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1] -; X86-SSE2-NEXT: pand %xmm1, %xmm3 +; X86-SSE2-NEXT: pxor %xmm1, %xmm1 +; X86-SSE2-NEXT: psubq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE2-NEXT: psrlq %xmm1, %xmm2 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 +; X86-SSE2-NEXT: psrlq %xmm1, %xmm3 +; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1] ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psrlq %xmm3, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] -; X86-SSE2-NEXT: psrlq %xmm2, %xmm0 +; X86-SSE2-NEXT: psllq $4, %xmm1 +; X86-SSE2-NEXT: psllq $14, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; X86-SSE2-NEXT: orpd %xmm5, %xmm0 +; X86-SSE2-NEXT: orpd %xmm3, %xmm0 ; X86-SSE2-NEXT: retl %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> ) ret <2 x i64> %res @@ -1932,9 +1928,8 @@ ; X86-SSE2-LABEL: splatconstant_funnnel_v2i64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psrlq $50, %xmm1 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] -; X86-SSE2-NEXT: psllq $14, %xmm0 +; X86-SSE2-NEXT: psllq $14, %xmm1 +; X86-SSE2-NEXT: psrlq $50, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1] ; X86-SSE2-NEXT: orpd %xmm1, %xmm0 ; X86-SSE2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -1867,21 +1867,13 @@ ; ; X86-SSE2-LABEL: constant_funnnel_v2i64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0] -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <4,u,14,u> -; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 -; X86-SSE2-NEXT: pand %xmm3, %xmm5 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 -; X86-SSE2-NEXT: psrlq %xmm5, %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] -; X86-SSE2-NEXT: psrlq %xmm5, %xmm1 +; X86-SSE2-NEXT: psrlq $4, %xmm2 +; X86-SSE2-NEXT: psrlq $14, %xmm1 ; X86-SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] -; X86-SSE2-NEXT: pandn %xmm3, %xmm4 -; X86-SSE2-NEXT: psllq $1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psllq %xmm4, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3] -; X86-SSE2-NEXT: psllq %xmm3, %xmm0 +; X86-SSE2-NEXT: psllq $60, %xmm1 +; X86-SSE2-NEXT: psllq $50, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; X86-SSE2-NEXT: orpd %xmm2, %xmm0 ; X86-SSE2-NEXT: retl @@ -2414,10 +2406,8 @@ ; X86-SSE2-LABEL: splatconstant_funnnel_v2i64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: psrlq $14, %xmm1 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] ; X86-SSE2-NEXT: psllq $50, %xmm0 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1] -; X86-SSE2-NEXT: orpd %xmm1, %xmm0 +; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: retl %res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> ) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -1548,24 +1548,20 @@ ; ; X86-SSE2-LABEL: constant_funnnel_v2i64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [63,0,63,0] -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = <4,u,14,u> -; X86-SSE2-NEXT: pxor %xmm3, %xmm3 -; X86-SSE2-NEXT: psubq %xmm2, %xmm3 -; X86-SSE2-NEXT: pand %xmm1, %xmm2 -; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 -; X86-SSE2-NEXT: psrlq %xmm2, %xmm4 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] -; X86-SSE2-NEXT: movdqa %xmm0, %xmm5 -; X86-SSE2-NEXT: psrlq %xmm2, %xmm5 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1] -; X86-SSE2-NEXT: pand %xmm1, %xmm3 +; X86-SSE2-NEXT: pxor %xmm1, %xmm1 +; X86-SSE2-NEXT: psubq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE2-NEXT: psllq %xmm1, %xmm2 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 +; X86-SSE2-NEXT: psllq %xmm1, %xmm3 +; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1] ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psllq %xmm3, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] -; X86-SSE2-NEXT: psllq %xmm2, %xmm0 +; X86-SSE2-NEXT: psrlq $4, %xmm1 +; X86-SSE2-NEXT: psrlq $14, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; X86-SSE2-NEXT: orpd %xmm5, %xmm0 +; X86-SSE2-NEXT: orpd %xmm3, %xmm0 ; X86-SSE2-NEXT: retl %res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> ) ret <2 x i64> %res @@ -2020,9 +2016,8 @@ ; X86-SSE2-LABEL: splatconstant_funnnel_v2i64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psllq $50, %xmm1 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] -; X86-SSE2-NEXT: psrlq $14, %xmm0 +; X86-SSE2-NEXT: psrlq $14, %xmm1 +; X86-SSE2-NEXT: psllq $50, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1] ; X86-SSE2-NEXT: orpd %xmm1, %xmm0 ; X86-SSE2-NEXT: retl