Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -423,37 +423,37 @@ } unsigned NewOpc; - bool NewIsUnscaled = false; + int Scale = 1; switch (MBBI->getOpcode()) { default: llvm_unreachable("Unexpected callee-save save/restore opcode!"); case AArch64::STPXi: NewOpc = AArch64::STPXpre; + Scale = 8; break; case AArch64::STPDi: NewOpc = AArch64::STPDpre; + Scale = 8; break; case AArch64::STRXui: NewOpc = AArch64::STRXpre; - NewIsUnscaled = true; break; case AArch64::STRDui: NewOpc = AArch64::STRDpre; - NewIsUnscaled = true; break; case AArch64::LDPXi: NewOpc = AArch64::LDPXpost; + Scale = 8; break; case AArch64::LDPDi: NewOpc = AArch64::LDPDpost; + Scale = 8; break; case AArch64::LDRXui: NewOpc = AArch64::LDRXpost; - NewIsUnscaled = true; break; case AArch64::LDRDui: NewOpc = AArch64::LDRDpost; - NewIsUnscaled = true; break; } @@ -471,12 +471,8 @@ "instruction!"); assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && "Unexpected base register in callee-save save/restore instruction!"); - // Last operand is immediate offset that needs fixing. - assert(CSStackSizeInc % 8 == 0); - int64_t CSStackSizeIncImm = CSStackSizeInc; - if (!NewIsUnscaled) - CSStackSizeIncImm /= 8; - MIB.addImm(CSStackSizeIncImm); + assert(CSStackSizeInc % Scale == 0); + MIB.addImm(CSStackSizeInc / Scale); MIB.setMIFlags(MBBI->getFlags()); MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); @@ -498,11 +494,21 @@ } (void)Opc; - assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi || - Opc == AArch64::STRXui || Opc == AArch64::STRDui || - Opc == AArch64::LDPXi || Opc == AArch64::LDPDi || - Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) && - "Unexpected callee-save save/restore opcode!"); + unsigned Scale; + switch (Opc) { + case AArch64::STPXi: + case AArch64::STRXui: + case AArch64::STPDi: + case AArch64::STRDui: + case AArch64::LDPXi: + case AArch64::LDRXui: + case AArch64::LDPDi: + case AArch64::LDRDui: + Scale = 8; + break; + default: + llvm_unreachable("Unexpected callee-save save/restore opcode!"); + } unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && @@ -511,7 +517,7 @@ MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); // All generated opcodes have scaled offsets. assert(LocalStackSize % 8 == 0); - OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); + OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale); } static void adaptForLdStOpt(MachineBasicBlock &MBB, @@ -1143,7 +1149,7 @@ unsigned Reg2 = AArch64::NoRegister; int FrameIdx; int Offset; - bool IsGPR; + enum RegType { GPR, FPR64 } Type; RegPairInfo() = default; @@ -1177,16 +1183,26 @@ RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); - assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || - AArch64::FPR64RegClass.contains(RPI.Reg1)); - RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); + if (AArch64::GPR64RegClass.contains(RPI.Reg1)) + RPI.Type = RegPairInfo::GPR; + else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) + RPI.Type = RegPairInfo::FPR64; + else + llvm_unreachable("Unsupported register class."); // Add the next reg to the pair if it is in the same register class. if (i + 1 < Count) { unsigned NextReg = CSI[i + 1].getReg(); - if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || - (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) - RPI.Reg2 = NextReg; + switch (RPI.Type) { + case RegPairInfo::GPR: + if (AArch64::GPR64RegClass.contains(NextReg)) + RPI.Reg2 = NextReg; + break; + case RegPairInfo::FPR64: + if (AArch64::FPR64RegClass.contains(NextReg)) + RPI.Reg2 = NextReg; + break; + } } // If either of the registers to be saved is the lr register, it means that @@ -1283,10 +1299,19 @@ // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rationale and sequence for restores in epilog. - if (RPI.IsGPR) - StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; - else - StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; + unsigned Size, Align; + switch (RPI.Type) { + case RegPairInfo::GPR: + StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; + Size = RPI.isPaired() ? 16 : 8; + Align = 8; + break; + case RegPairInfo::FPR64: + StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; + Size = RPI.isPaired() ? 16 : 8; + Align = 8; + break; + } LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI); if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); dbgs() << ") -> fi#(" << RPI.FrameIdx; @@ -1302,15 +1327,16 @@ MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), - MachineMemOperand::MOStore, 8, 8)); + MachineMemOperand::MOStore, Size, Align)); } MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) - .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit + .addImm(RPI.Offset) // [sp, #offset*scale], + // where factor*scale is implicit .setMIFlag(MachineInstr::FrameSetup); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), - MachineMemOperand::MOStore, 8, 8)); + MachineMemOperand::MOStore, Size, Align)); } return true; } @@ -1344,10 +1370,19 @@ // ldp x22, x21, [sp, #0] // addImm(+0) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; - if (RPI.IsGPR) - LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; - else - LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; + unsigned Size, Align; + switch (RPI.Type) { + case RegPairInfo::GPR: + LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; + Size = RPI.isPaired() ? 16 : 8; + Align = 8; + break; + case RegPairInfo::FPR64: + LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; + Size = RPI.isPaired() ? 16 : 8; + Align = 8; + break; + } LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI); if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); dbgs() << ") -> fi#(" << RPI.FrameIdx; @@ -1359,15 +1394,16 @@ MIB.addReg(Reg2, getDefRegState(true)); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), - MachineMemOperand::MOLoad, 8, 8)); + MachineMemOperand::MOLoad, Size, Align)); } MIB.addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) - .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit + .addImm(RPI.Offset) // [sp, #offset*scale] + // where factor*scale is implicit .setMIFlag(MachineInstr::FrameDestroy); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), - MachineMemOperand::MOLoad, 8, 8)); + MachineMemOperand::MOLoad, Size, Align)); }; if (ReverseCSRRestoreSeq) Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -206,11 +206,11 @@ ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: and w20, w8, #0xffff00 ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y @@ -225,12 +225,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0xffff00 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll @@ -212,11 +212,11 @@ ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: and w20, w8, #0x55555555 ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y @@ -231,12 +231,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0x55555555 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -208,11 +208,11 @@ ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: and w20, w8, #0xf0f0f0f ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y @@ -227,12 +227,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0xf0f0f0f ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -201,11 +201,11 @@ ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: and w20, w8, #0xffff ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y @@ -220,12 +220,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, #0xffff ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y Index: test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -558,11 +558,11 @@ ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: and w20, w8, w3 ; CHECK-NEXT: mov w0, w20 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y @@ -576,12 +576,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w0, w0, w1 -; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov w19, w1 ; CHECK-NEXT: and w20, w0, w3 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 -; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret %n0 = xor i32 %x, %y