Skip to content

Commit 309c4f9

Browse files
committedOct 27, 2015
[x86] replace integer logic ops with packed SSE FP logic ops
If we have an operand to a bitwise logic op that's already in an XMM register and the result is going to be sent to an XMM register, then use an SSE logic op to avoid moves between the integer and vector register files. Related commits: http://reviews.llvm.org/rL248395 http://reviews.llvm.org/rL248399 http://reviews.llvm.org/rL248404 http://reviews.llvm.org/rL248409 http://reviews.llvm.org/rL248415 This should solve PR22428: https://llvm.org/bugs/show_bug.cgi?id=22428 llvm-svn: 251378
1 parent e7cbaf3 commit 309c4f9

File tree

2 files changed

+38
-20
lines changed

2 files changed

+38
-20
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23123,7 +23123,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
2312323123
EltNo);
2312423124
}
2312523125

23126-
static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
23126+
static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
23127+
const X86Subtarget *Subtarget) {
2312723128
SDValue N0 = N->getOperand(0);
2312823129
EVT VT = N->getValueType(0);
2312923130

@@ -23139,6 +23140,29 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
2313923140
return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
2314023141
}
2314123142

23143+
// Convert a bitcasted integer logic operation that has one bitcasted
23144+
// floating-point operand and one constant operand into a floating-point
23145+
// logic operation. This may create a load of the constant, but that is
23146+
// cheaper than materializing the constant in an integer register and
23147+
// transferring it to an SSE register or transferring the SSE operand to
23148+
// integer register and back.
23149+
unsigned FPOpcode;
23150+
switch (N0.getOpcode()) {
23151+
case ISD::AND: FPOpcode = X86ISD::FAND; break;
23152+
case ISD::OR: FPOpcode = X86ISD::FOR; break;
23153+
case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
23154+
default: return SDValue();
23155+
}
23156+
if (((Subtarget->hasSSE1() && VT == MVT::f32) ||
23157+
(Subtarget->hasSSE2() && VT == MVT::f64)) &&
23158+
isa<ConstantSDNode>(N0.getOperand(1)) &&
23159+
N0.getOperand(0).getOpcode() == ISD::BITCAST &&
23160+
N0.getOperand(0).getOperand(0).getValueType() == VT) {
23161+
SDValue N000 = N0.getOperand(0).getOperand(0);
23162+
SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
23163+
return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
23164+
}
23165+
2314223166
return SDValue();
2314323167
}
2314423168

@@ -26635,7 +26659,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
2663526659
case ISD::SELECT:
2663626660
case X86ISD::SHRUNKBLEND:
2663726661
return PerformSELECTCombine(N, DAG, DCI, Subtarget);
26638-
case ISD::BITCAST: return PerformBITCASTCombine(N, DAG);
26662+
case ISD::BITCAST: return PerformBITCASTCombine(N, DAG, Subtarget);
2663926663
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
2664026664
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
2664126665
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);

‎llvm/test/CodeGen/X86/fp-logic.ll

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,8 @@ define float @f6(float %x, i32 %y) {
110110
define float @f7(float %x) {
111111
; CHECK-LABEL: f7:
112112
; CHECK: # BB#0:
113-
; CHECK-NEXT: movd %xmm0, %eax
114-
; CHECK-NEXT: andl $3, %eax
115-
; CHECK-NEXT: movd %eax, %xmm0
113+
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
114+
; CHECK-NEXT: andps %xmm1, %xmm0
116115
; CHECK-NEXT: retq
117116

118117
%bc1 = bitcast float %x to i32
@@ -126,9 +125,8 @@ define float @f7(float %x) {
126125
define float @f8(float %x) {
127126
; CHECK-LABEL: f8:
128127
; CHECK: # BB#0:
129-
; CHECK-NEXT: movd %xmm0, %eax
130-
; CHECK-NEXT: andl $4, %eax
131-
; CHECK-NEXT: movd %eax, %xmm0
128+
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
129+
; CHECK-NEXT: andps %xmm1, %xmm0
132130
; CHECK-NEXT: retq
133131

134132
%bc1 = bitcast float %x to i32
@@ -196,9 +194,8 @@ define float @xor(float %x, float %y) {
196194
define float @f7_or(float %x) {
197195
; CHECK-LABEL: f7_or:
198196
; CHECK: # BB#0:
199-
; CHECK-NEXT: movd %xmm0, %eax
200-
; CHECK-NEXT: orl $3, %eax
201-
; CHECK-NEXT: movd %eax, %xmm0
197+
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
198+
; CHECK-NEXT: orps %xmm1, %xmm0
202199
; CHECK-NEXT: retq
203200

204201
%bc1 = bitcast float %x to i32
@@ -210,9 +207,8 @@ define float @f7_or(float %x) {
210207
define float @f7_xor(float %x) {
211208
; CHECK-LABEL: f7_xor:
212209
; CHECK: # BB#0:
213-
; CHECK-NEXT: movd %xmm0, %eax
214-
; CHECK-NEXT: xorl $3, %eax
215-
; CHECK-NEXT: movd %eax, %xmm0
210+
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
211+
; CHECK-NEXT: xorps %xmm1, %xmm0
216212
; CHECK-NEXT: retq
217213

218214
%bc1 = bitcast float %x to i32
@@ -239,9 +235,8 @@ define double @doubles(double %x, double %y) {
239235
define double @f7_double(double %x) {
240236
; CHECK-LABEL: f7_double:
241237
; CHECK: # BB#0:
242-
; CHECK-NEXT: movd %xmm0, %rax
243-
; CHECK-NEXT: andl $3, %eax
244-
; CHECK-NEXT: movd %rax, %xmm0
238+
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
239+
; CHECK-NEXT: andpd %xmm1, %xmm0
245240
; CHECK-NEXT: retq
246241

247242
%bc1 = bitcast double %x to i64
@@ -257,9 +252,8 @@ define double @f7_double(double %x) {
257252
define float @movmsk(float %x) {
258253
; CHECK-LABEL: movmsk:
259254
; CHECK: # BB#0:
260-
; CHECK-NEXT: movmskps %xmm0, %eax
261-
; CHECK-NEXT: shll $31, %eax
262-
; CHECK-NEXT: movd %eax, %xmm0
255+
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
256+
; CHECK-NEXT: andps %xmm1, %xmm0
263257
; CHECK-NEXT: retq
264258

265259
%bc1 = bitcast float %x to i32

0 commit comments

Comments
 (0)
Please sign in to comment.