Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -8980,28 +8980,25 @@ break; } - // Check if chain operands and base addresses match. - if (Load1->getOperand(0) != Load2->getOperand(0) || - Load1->getOperand(5) != Load2->getOperand(5)) + // Lambda to check if both the loads have the same value for an operand index. + auto HasSameOp = [&](int I) { + return Load1->getOperand(I) == Load2->getOperand(I); + }; + + // All operands except the displacement should match. + if (!HasSameOp(X86::AddrBaseReg) || !HasSameOp(X86::AddrScaleAmt) || + !HasSameOp(X86::AddrIndexReg) || !HasSameOp(X86::AddrSegmentReg)) return false; - // Segment operands should match as well. - if (Load1->getOperand(4) != Load2->getOperand(4)) + + // Now let's examine if the displacements are constants. + auto Disp1 = dyn_cast(Load1->getOperand(X86::AddrDisp)); + auto Disp2 = dyn_cast(Load2->getOperand(X86::AddrDisp)); + if (!Disp1 || !Disp2) return false; - // Scale should be 1, Index should be Reg0. - if (Load1->getOperand(1) == Load2->getOperand(1) && - Load1->getOperand(2) == Load2->getOperand(2)) { - if (cast(Load1->getOperand(1))->getZExtValue() != 1) - return false; - // Now let's examine the displacements. - if (isa(Load1->getOperand(3)) && - isa(Load2->getOperand(3))) { - Offset1 = cast(Load1->getOperand(3))->getSExtValue(); - Offset2 = cast(Load2->getOperand(3))->getSExtValue(); - return true; - } - } - return false; + Offset1 = Disp1->getSExtValue(); + Offset2 = Disp2->getSExtValue(); + return true; } bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, Index: test/CodeGen/X86/load-slice.ll =================================================================== --- test/CodeGen/X86/load-slice.ll +++ test/CodeGen/X86/load-slice.ll @@ -19,10 +19,10 @@ ; STRESS-LABEL: t1: ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. ; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]] -; Add low slice: out[out_start].real, this is base + 0. -; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]] ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. ; STRESS-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]] +; Add low slice: out[out_start].real, this is base + 0. +; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]] ; Add high slice: out[out_start].imm, this is base + 4. ; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] ; Swap Imm and Real. @@ -34,10 +34,10 @@ ; REGULAR-LABEL: t1: ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. ; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]] -; Add low slice: out[out_start].real, this is base + 0. -; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]] ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. ; REGULAR-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]] +; Add low slice: out[out_start].real, this is base + 0. +; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]] ; Add high slice: out[out_start].imm, this is base + 4. ; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] ; Swap Imm and Real.