Skip to content

Commit 7bb433c

Browse files
committedSep 6, 2019
[X86] Use MOVSX by default instead of CBW to extend i8 to AX for i8 sdivrem.
We can use a MOVSX16 here then rely on FixupBWInst to change to MOVSX32 if the upper bits are dead. With a special case to not promote if it could be turned into CBW. Then we can rely on X86MCInstLower to turn the MOVSX into CBW very late if register allocation worked out. Using MOVSX gives an opportunity to use the MOVSX as a both a copy and a sign extend since the input and output register aren't tied together. Differential Revision: https://reviews.llvm.org/D67192 llvm-svn: 371243
1 parent 22b35c4 commit 7bb433c

7 files changed

+222
-328
lines changed
 

‎llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

+8-5
Original file line numberDiff line numberDiff line change
@@ -4703,7 +4703,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
47034703
default: llvm_unreachable("Unsupported VT!");
47044704
case MVT::i8:
47054705
LoReg = X86::AL; ClrReg = HiReg = X86::AH;
4706-
SExtOpcode = X86::CBW;
4706+
SExtOpcode = 0; // Not used.
47074707
break;
47084708
case MVT::i16:
47094709
LoReg = X86::AX; HiReg = X86::DX;
@@ -4725,21 +4725,24 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
47254725
bool signBitIsZero = CurDAG->SignBitIsZero(N0);
47264726

47274727
SDValue InFlag;
4728-
if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
4728+
if (NVT == MVT::i8) {
47294729
// Special case for div8, just use a move with zero extension to AX to
47304730
// clear the upper 8 bits (AH).
47314731
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
47324732
MachineSDNode *Move;
47334733
if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
47344734
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
4735-
Move = CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i32,
4736-
MVT::Other, Ops);
4735+
unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8
4736+
: X86::MOVZX16rm8;
4737+
Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
47374738
Chain = SDValue(Move, 1);
47384739
ReplaceUses(N0.getValue(1), Chain);
47394740
// Record the mem-refs
47404741
CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
47414742
} else {
4742-
Move = CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i32, N0);
4743+
unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8
4744+
: X86::MOVZX16rr8;
4745+
Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
47434746
Chain = CurDAG->getEntryNode();
47444747
}
47454748
Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),

‎llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll

+5-7
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,15 @@ define i32 @main() nounwind uwtable {
1818
; CHECK-LABEL: main:
1919
; CHECK: # %bb.0: # %entry
2020
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
21-
; CHECK-NEXT: pextrb $1, %xmm0, %eax
21+
; CHECK-NEXT: pextrb $1, %xmm0, %ecx
2222
; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
23-
; CHECK-NEXT: pextrb $1, %xmm1, %ecx
24-
; CHECK-NEXT: # kill: def $al killed $al killed $eax
23+
; CHECK-NEXT: pextrb $1, %xmm1, %eax
2524
; CHECK-NEXT: cbtw
25+
; CHECK-NEXT: pextrb $0, %xmm0, %edx
26+
; CHECK-NEXT: pextrb $0, %xmm1, %esi
2627
; CHECK-NEXT: idivb %cl
2728
; CHECK-NEXT: movl %eax, %ecx
28-
; CHECK-NEXT: pextrb $0, %xmm0, %eax
29-
; CHECK-NEXT: # kill: def $al killed $al killed $eax
30-
; CHECK-NEXT: cbtw
31-
; CHECK-NEXT: pextrb $0, %xmm1, %edx
29+
; CHECK-NEXT: movsbl %sil, %eax
3230
; CHECK-NEXT: idivb %dl
3331
; CHECK-NEXT: movzbl %cl, %ecx
3432
; CHECK-NEXT: movzbl %al, %eax

‎llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll

+65-100
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@ define i8 @scalar_i8(i8 %x, i8 %y, i8* %divdst) nounwind {
1212
; X86-LABEL: scalar_i8:
1313
; X86: # %bb.0:
1414
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
15-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
16-
; X86-NEXT: movl %ecx, %eax
17-
; X86-NEXT: cbtw
1815
; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
16+
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
17+
; X86-NEXT: movsbl %cl, %eax
1918
; X86-NEXT: idivb %ch
2019
; X86-NEXT: movb %al, (%edx)
2120
; X86-NEXT: mulb %ch
@@ -25,13 +24,13 @@ define i8 @scalar_i8(i8 %x, i8 %y, i8* %divdst) nounwind {
2524
;
2625
; X64-LABEL: scalar_i8:
2726
; X64: # %bb.0:
28-
; X64-NEXT: movl %edi, %eax
29-
; X64-NEXT: cbtw
27+
; X64-NEXT: movsbl %dil, %ecx
28+
; X64-NEXT: movl %ecx, %eax
3029
; X64-NEXT: idivb %sil
3130
; X64-NEXT: movb %al, (%rdx)
3231
; X64-NEXT: mulb %sil
33-
; X64-NEXT: subb %al, %dil
34-
; X64-NEXT: movl %edi, %eax
32+
; X64-NEXT: subb %al, %cl
33+
; X64-NEXT: movl %ecx, %eax
3534
; X64-NEXT: retq
3635
%div = sdiv i8 %x, %y
3736
store i8 %div, i8* %divdst, align 4
@@ -182,104 +181,87 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, <16 x i8>* %divdst)
182181
; X86-NEXT: subl $48, %esp
183182
; X86-NEXT: movdqa %xmm0, (%esp)
184183
; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp)
185-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
186-
; X86-NEXT: cbtw
184+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
187185
; X86-NEXT: idivb {{[0-9]+}}(%esp)
188186
; X86-NEXT: movzbl %al, %eax
189187
; X86-NEXT: movd %eax, %xmm2
190-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
191-
; X86-NEXT: cbtw
188+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
192189
; X86-NEXT: idivb {{[0-9]+}}(%esp)
193190
; X86-NEXT: movzbl %al, %eax
194191
; X86-NEXT: movd %eax, %xmm3
195192
; X86-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
196-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
197-
; X86-NEXT: cbtw
193+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
198194
; X86-NEXT: idivb {{[0-9]+}}(%esp)
199195
; X86-NEXT: movzbl %al, %eax
200196
; X86-NEXT: movd %eax, %xmm4
201-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
202-
; X86-NEXT: cbtw
197+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
203198
; X86-NEXT: idivb {{[0-9]+}}(%esp)
204199
; X86-NEXT: movzbl %al, %eax
205200
; X86-NEXT: movd %eax, %xmm2
206201
; X86-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
207202
; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
208-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
209-
; X86-NEXT: cbtw
203+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
210204
; X86-NEXT: idivb {{[0-9]+}}(%esp)
211205
; X86-NEXT: movzbl %al, %eax
212206
; X86-NEXT: movd %eax, %xmm3
213-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
214-
; X86-NEXT: cbtw
207+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
215208
; X86-NEXT: idivb {{[0-9]+}}(%esp)
216209
; X86-NEXT: movzbl %al, %eax
217210
; X86-NEXT: movd %eax, %xmm4
218211
; X86-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
219-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
220-
; X86-NEXT: cbtw
212+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
221213
; X86-NEXT: idivb {{[0-9]+}}(%esp)
222214
; X86-NEXT: movzbl %al, %eax
223215
; X86-NEXT: movd %eax, %xmm5
224-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
225-
; X86-NEXT: cbtw
216+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
226217
; X86-NEXT: idivb {{[0-9]+}}(%esp)
227218
; X86-NEXT: movzbl %al, %eax
228219
; X86-NEXT: movd %eax, %xmm3
229220
; X86-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
230-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
231-
; X86-NEXT: cbtw
221+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
232222
; X86-NEXT: idivb {{[0-9]+}}(%esp)
233223
; X86-NEXT: movzbl %al, %eax
234224
; X86-NEXT: movd %eax, %xmm5
235-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
236-
; X86-NEXT: cbtw
225+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
237226
; X86-NEXT: idivb {{[0-9]+}}(%esp)
238227
; X86-NEXT: movzbl %al, %eax
239228
; X86-NEXT: movd %eax, %xmm6
240-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
241-
; X86-NEXT: cbtw
229+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
230+
; X86-NEXT: idivb {{[0-9]+}}(%esp)
231+
; X86-NEXT: movzbl %al, %edx
232+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
242233
; X86-NEXT: idivb {{[0-9]+}}(%esp)
243234
; X86-NEXT: movzbl %al, %esi
244-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
245-
; X86-NEXT: cbtw
235+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
246236
; X86-NEXT: idivb {{[0-9]+}}(%esp)
247237
; X86-NEXT: movzbl %al, %edi
248-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
249-
; X86-NEXT: cbtw
238+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
250239
; X86-NEXT: idivb {{[0-9]+}}(%esp)
251240
; X86-NEXT: movzbl %al, %ebx
252-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
253-
; X86-NEXT: cbtw
241+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
254242
; X86-NEXT: idivb {{[0-9]+}}(%esp)
255243
; X86-NEXT: movl %eax, %ecx
256-
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
257-
; X86-NEXT: cbtw
258-
; X86-NEXT: movb (%esp), %dl
244+
; X86-NEXT: movsbl (%esp), %eax
259245
; X86-NEXT: idivb {{[0-9]+}}(%esp)
260246
; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
261-
; X86-NEXT: movd %esi, %xmm4
247+
; X86-NEXT: movd %edx, %xmm4
262248
; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
263-
; X86-NEXT: movd %edi, %xmm2
249+
; X86-NEXT: movd %esi, %xmm2
264250
; X86-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
251+
; X86-NEXT: movd %edi, %xmm5
265252
; X86-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
253+
; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
266254
; X86-NEXT: movd %ebx, %xmm4
267255
; X86-NEXT: movzbl %cl, %ecx
268-
; X86-NEXT: movd %ecx, %xmm5
256+
; X86-NEXT: movd %ecx, %xmm6
269257
; X86-NEXT: movl 8(%ebp), %ecx
270-
; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
271-
; X86-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
272-
; X86-NEXT: movzbl %al, %eax
273-
; X86-NEXT: movd %eax, %xmm4
274-
; X86-NEXT: movl %edx, %eax
275-
; X86-NEXT: cbtw
276-
; X86-NEXT: idivb {{[0-9]+}}(%esp)
258+
; X86-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
277259
; X86-NEXT: movzbl %al, %eax
278-
; X86-NEXT: movd %eax, %xmm6
279-
; X86-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
280-
; X86-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
281-
; X86-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1]
282-
; X86-NEXT: movdqa %xmm6, %xmm2
260+
; X86-NEXT: movd %eax, %xmm5
261+
; X86-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3],xmm5[4],xmm6[4],xmm5[5],xmm6[5],xmm5[6],xmm6[6],xmm5[7],xmm6[7]
262+
; X86-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
263+
; X86-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1]
264+
; X86-NEXT: movdqa %xmm5, %xmm2
283265
; X86-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
284266
; X86-NEXT: movdqa %xmm2, (%ecx)
285267
; X86-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
@@ -288,9 +270,9 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, <16 x i8>* %divdst)
288270
; X86-NEXT: pmullw %xmm3, %xmm2
289271
; X86-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
290272
; X86-NEXT: pand %xmm3, %xmm2
291-
; X86-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3],xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7]
273+
; X86-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3],xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
292274
; X86-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
293-
; X86-NEXT: pmullw %xmm6, %xmm1
275+
; X86-NEXT: pmullw %xmm5, %xmm1
294276
; X86-NEXT: pand %xmm3, %xmm1
295277
; X86-NEXT: packuswb %xmm2, %xmm1
296278
; X86-NEXT: psubb %xmm1, %xmm0
@@ -312,66 +294,53 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, <16 x i8>* %divdst)
312294
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
313295
; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
314296
; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
315-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
316-
; X64-NEXT: cbtw
297+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
317298
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
318299
; X64-NEXT: movzbl %al, %eax
319300
; X64-NEXT: movd %eax, %xmm2
320-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
321-
; X64-NEXT: cbtw
301+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
322302
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
323303
; X64-NEXT: movzbl %al, %r8d
324-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
325-
; X64-NEXT: cbtw
304+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
326305
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
327306
; X64-NEXT: movzbl %al, %r9d
328-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
329-
; X64-NEXT: cbtw
307+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
330308
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
331309
; X64-NEXT: movzbl %al, %r10d
332-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
333-
; X64-NEXT: cbtw
310+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
334311
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
335312
; X64-NEXT: movzbl %al, %r11d
336-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
337-
; X64-NEXT: cbtw
313+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
338314
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
339315
; X64-NEXT: movzbl %al, %r14d
340-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
341-
; X64-NEXT: cbtw
316+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
342317
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
343318
; X64-NEXT: movzbl %al, %r15d
344-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
345-
; X64-NEXT: cbtw
319+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
346320
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
347321
; X64-NEXT: movzbl %al, %r12d
348-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
349-
; X64-NEXT: cbtw
322+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
350323
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
351324
; X64-NEXT: movzbl %al, %r13d
352-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
353-
; X64-NEXT: cbtw
325+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
326+
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
327+
; X64-NEXT: movzbl %al, %edi
328+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
329+
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
330+
; X64-NEXT: movzbl %al, %esi
331+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
354332
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
355333
; X64-NEXT: movzbl %al, %ebx
356-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
357-
; X64-NEXT: cbtw
334+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
358335
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
359336
; X64-NEXT: movzbl %al, %ebp
360-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
361-
; X64-NEXT: cbtw
362-
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
363-
; X64-NEXT: movzbl %al, %edi
364-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
365-
; X64-NEXT: cbtw
337+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
366338
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
367-
; X64-NEXT: movzbl %al, %esi
368-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
369-
; X64-NEXT: cbtw
339+
; X64-NEXT: movzbl %al, %edx
340+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
370341
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
371342
; X64-NEXT: movl %eax, %ecx
372-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al
373-
; X64-NEXT: cbtw
374-
; X64-NEXT: movb -{{[0-9]+}}(%rsp), %dl
343+
; X64-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
375344
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
376345
; X64-NEXT: movd %r8d, %xmm3
377346
; X64-NEXT: movd %r9d, %xmm4
@@ -386,24 +355,20 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, <16 x i8>* %divdst)
386355
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3],xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7]
387356
; X64-NEXT: movd %r13d, %xmm6
388357
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
389-
; X64-NEXT: movd %ebx, %xmm4
358+
; X64-NEXT: movd %edi, %xmm4
390359
; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
391-
; X64-NEXT: movd %ebp, %xmm2
360+
; X64-NEXT: movd %esi, %xmm2
392361
; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
393-
; X64-NEXT: movd %edi, %xmm5
362+
; X64-NEXT: movd %ebx, %xmm5
394363
; X64-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3],xmm4[4],xmm6[4],xmm4[5],xmm6[5],xmm4[6],xmm6[6],xmm4[7],xmm6[7]
395-
; X64-NEXT: movd %esi, %xmm6
364+
; X64-NEXT: movd %ebp, %xmm6
396365
; X64-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
397-
; X64-NEXT: movzbl %cl, %ecx
398-
; X64-NEXT: movd %ecx, %xmm2
366+
; X64-NEXT: movd %edx, %xmm2
399367
; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
368+
; X64-NEXT: movzbl %cl, %ecx
369+
; X64-NEXT: movd %ecx, %xmm4
400370
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3],xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7]
401371
; X64-NEXT: movzbl %al, %eax
402-
; X64-NEXT: movd %eax, %xmm4
403-
; X64-NEXT: movl %edx, %eax
404-
; X64-NEXT: cbtw
405-
; X64-NEXT: idivb -{{[0-9]+}}(%rsp)
406-
; X64-NEXT: movzbl %al, %eax
407372
; X64-NEXT: movd %eax, %xmm6
408373
; X64-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
409374
; X64-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3]

‎llvm/test/CodeGen/X86/divrem.ll

+2-5
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,7 @@ define void @si8(i8 %x, i8 %y, i8* %p, i8* %q) nounwind {
118118
; X32-LABEL: si8:
119119
; X32: # %bb.0:
120120
; X32-NEXT: pushl %ebx
121-
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
122-
; X32-NEXT: cbtw
121+
; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
123122
; X32-NEXT: idivb {{[0-9]+}}(%esp)
124123
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
125124
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -131,9 +130,7 @@ define void @si8(i8 %x, i8 %y, i8* %p, i8* %q) nounwind {
131130
;
132131
; X64-LABEL: si8:
133132
; X64: # %bb.0:
134-
; X64-NEXT: movl %edi, %eax
135-
; X64-NEXT: # kill: def $al killed $al killed $eax
136-
; X64-NEXT: cbtw
133+
; X64-NEXT: movsbl %dil, %eax
137134
; X64-NEXT: idivb %sil
138135
; X64-NEXT: movsbl %ah, %esi
139136
; X64-NEXT: movb %al, (%rdx)

‎llvm/test/CodeGen/X86/divrem8_ext.ll

+8-20
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ define i64 @test_urem_zext64_ah(i8 %x, i8 %y) {
9393
define signext i8 @test_sdivrem_sext_ah(i8 %x, i8 %y) {
9494
; X32-LABEL: test_sdivrem_sext_ah:
9595
; X32: # %bb.0:
96-
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
97-
; X32-NEXT: cbtw
96+
; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
9897
; X32-NEXT: idivb {{[0-9]+}}(%esp)
9998
; X32-NEXT: movsbl %ah, %ecx
10099
; X32-NEXT: movb %al, z
@@ -103,9 +102,7 @@ define signext i8 @test_sdivrem_sext_ah(i8 %x, i8 %y) {
103102
;
104103
; X64-LABEL: test_sdivrem_sext_ah:
105104
; X64: # %bb.0:
106-
; X64-NEXT: movl %edi, %eax
107-
; X64-NEXT: # kill: def $al killed $al killed $eax
108-
; X64-NEXT: cbtw
105+
; X64-NEXT: movsbl %dil, %eax
109106
; X64-NEXT: idivb %sil
110107
; X64-NEXT: movsbl %ah, %ecx
111108
; X64-NEXT: movb %al, {{.*}}(%rip)
@@ -120,18 +117,15 @@ define signext i8 @test_sdivrem_sext_ah(i8 %x, i8 %y) {
120117
define signext i8 @test_srem_sext_ah(i8 %x, i8 %y) {
121118
; X32-LABEL: test_srem_sext_ah:
122119
; X32: # %bb.0:
123-
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
124-
; X32-NEXT: cbtw
120+
; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
125121
; X32-NEXT: idivb {{[0-9]+}}(%esp)
126122
; X32-NEXT: movsbl %ah, %eax
127123
; X32-NEXT: # kill: def $al killed $al killed $eax
128124
; X32-NEXT: retl
129125
;
130126
; X64-LABEL: test_srem_sext_ah:
131127
; X64: # %bb.0:
132-
; X64-NEXT: movl %edi, %eax
133-
; X64-NEXT: # kill: def $al killed $al killed $eax
134-
; X64-NEXT: cbtw
128+
; X64-NEXT: movsbl %dil, %eax
135129
; X64-NEXT: idivb %sil
136130
; X64-NEXT: movsbl %ah, %eax
137131
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -143,9 +137,8 @@ define signext i8 @test_srem_sext_ah(i8 %x, i8 %y) {
143137
define i8 @test_srem_noext_ah(i8 %x, i8 %y) {
144138
; X32-LABEL: test_srem_noext_ah:
145139
; X32: # %bb.0:
146-
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
147-
; X32-NEXT: cbtw
148140
; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
141+
; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
149142
; X32-NEXT: idivb %cl
150143
; X32-NEXT: movsbl %ah, %eax
151144
; X32-NEXT: addb %cl, %al
@@ -154,9 +147,7 @@ define i8 @test_srem_noext_ah(i8 %x, i8 %y) {
154147
;
155148
; X64-LABEL: test_srem_noext_ah:
156149
; X64: # %bb.0:
157-
; X64-NEXT: movl %edi, %eax
158-
; X64-NEXT: # kill: def $al killed $al killed $eax
159-
; X64-NEXT: cbtw
150+
; X64-NEXT: movsbl %dil, %eax
160151
; X64-NEXT: idivb %sil
161152
; X64-NEXT: movsbl %ah, %eax
162153
; X64-NEXT: addb %sil, %al
@@ -170,8 +161,7 @@ define i8 @test_srem_noext_ah(i8 %x, i8 %y) {
170161
define i64 @test_srem_sext64_ah(i8 %x, i8 %y) {
171162
; X32-LABEL: test_srem_sext64_ah:
172163
; X32: # %bb.0:
173-
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
174-
; X32-NEXT: cbtw
164+
; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
175165
; X32-NEXT: idivb {{[0-9]+}}(%esp)
176166
; X32-NEXT: movsbl %ah, %eax
177167
; X32-NEXT: movl %eax, %edx
@@ -180,9 +170,7 @@ define i64 @test_srem_sext64_ah(i8 %x, i8 %y) {
180170
;
181171
; X64-LABEL: test_srem_sext64_ah:
182172
; X64: # %bb.0:
183-
; X64-NEXT: movl %edi, %eax
184-
; X64-NEXT: # kill: def $al killed $al killed $eax
185-
; X64-NEXT: cbtw
173+
; X64-NEXT: movsbl %dil, %eax
186174
; X64-NEXT: idivb %sil
187175
; X64-NEXT: movsbl %ah, %eax
188176
; X64-NEXT: cltq

‎llvm/test/CodeGen/X86/scalar_widen_div.ll

+12-20
Original file line numberDiff line numberDiff line change
@@ -56,21 +56,17 @@ entry:
5656
define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
5757
; CHECK-LABEL: test_char_div:
5858
; CHECK: # %bb.0:
59-
; CHECK-NEXT: movl %edx, %r10d
60-
; CHECK-NEXT: movl %edi, %eax
61-
; CHECK-NEXT: # kill: def $al killed $al killed $eax
62-
; CHECK-NEXT: cbtw
59+
; CHECK-NEXT: movsbl %dil, %eax
6360
; CHECK-NEXT: idivb %cl
6461
; CHECK-NEXT: movl %eax, %edi
65-
; CHECK-NEXT: movl %esi, %eax
66-
; CHECK-NEXT: cbtw
62+
; CHECK-NEXT: movsbl %sil, %eax
6763
; CHECK-NEXT: idivb %r8b
68-
; CHECK-NEXT: movl %eax, %edx
69-
; CHECK-NEXT: movl %r10d, %eax
70-
; CHECK-NEXT: cbtw
64+
; CHECK-NEXT: movl %eax, %esi
65+
; CHECK-NEXT: movsbl %dl, %eax
7166
; CHECK-NEXT: idivb %r9b
7267
; CHECK-NEXT: movl %eax, %ecx
7368
; CHECK-NEXT: movl %edi, %eax
69+
; CHECK-NEXT: movl %esi, %edx
7470
; CHECK-NEXT: retq
7571
%div.r = sdiv <3 x i8> %num, %div
7672
ret <3 x i8> %div.r
@@ -258,31 +254,27 @@ define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
258254
define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
259255
; CHECK-LABEL: test_char_rem:
260256
; CHECK: # %bb.0:
257+
; CHECK-NEXT: pextrb $1, %xmm1, %ecx
261258
; CHECK-NEXT: pextrb $1, %xmm0, %eax
262-
; CHECK-NEXT: # kill: def $al killed $al killed $eax
263259
; CHECK-NEXT: cbtw
264-
; CHECK-NEXT: pextrb $1, %xmm1, %ecx
265260
; CHECK-NEXT: idivb %cl
266261
; CHECK-NEXT: movsbl %ah, %ecx
262+
; CHECK-NEXT: pextrb $0, %xmm1, %edx
267263
; CHECK-NEXT: pextrb $0, %xmm0, %eax
268-
; CHECK-NEXT: # kill: def $al killed $al killed $eax
269264
; CHECK-NEXT: cbtw
270-
; CHECK-NEXT: pextrb $0, %xmm1, %edx
271265
; CHECK-NEXT: idivb %dl
272266
; CHECK-NEXT: movsbl %ah, %eax
273267
; CHECK-NEXT: movd %eax, %xmm2
274-
; CHECK-NEXT: pextrb $2, %xmm0, %eax
275-
; CHECK-NEXT: # kill: def $al killed $al killed $eax
276-
; CHECK-NEXT: cbtw
277268
; CHECK-NEXT: pinsrb $1, %ecx, %xmm2
278269
; CHECK-NEXT: pextrb $2, %xmm1, %ecx
270+
; CHECK-NEXT: pextrb $2, %xmm0, %eax
271+
; CHECK-NEXT: cbtw
279272
; CHECK-NEXT: idivb %cl
280-
; CHECK-NEXT: movsbl %ah, %ecx
273+
; CHECK-NEXT: movsbl %ah, %eax
274+
; CHECK-NEXT: pinsrb $2, %eax, %xmm2
275+
; CHECK-NEXT: pextrb $3, %xmm1, %ecx
281276
; CHECK-NEXT: pextrb $3, %xmm0, %eax
282-
; CHECK-NEXT: # kill: def $al killed $al killed $eax
283277
; CHECK-NEXT: cbtw
284-
; CHECK-NEXT: pinsrb $2, %ecx, %xmm2
285-
; CHECK-NEXT: pextrb $3, %xmm1, %ecx
286278
; CHECK-NEXT: idivb %cl
287279
; CHECK-NEXT: movsbl %ah, %eax
288280
; CHECK-NEXT: pinsrb $3, %eax, %xmm2

‎llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll

+122-171
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)
Please sign in to comment.