diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -114,6 +114,8 @@ ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8; ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX; ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; + ImmToIdxMap[PPC::LQ] = PPC::LQX_PSEUDO; + ImmToIdxMap[PPC::STQ] = PPC::STQX_PSEUDO; // VSX ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX; @@ -489,6 +491,14 @@ LLVM_DEBUG(dbgs() << "TRUE - Memory operand is X-Form.\n"); return true; } + + // This is a spill/restore of a quadword. + if ((Opcode == PPC::RESTORE_QUADWORD) || (Opcode == PPC::SPILL_QUADWORD)) { + LLVM_DEBUG(dbgs() << "Memory Operand: " << InstrInfo->getName(Opcode) + << " for register " << printReg(Reg, this) << ".\n"); + LLVM_DEBUG(dbgs() << "TRUE - Memory operand is a quadword.\n"); + return true; + } } LLVM_DEBUG(dbgs() << "FALSE - Scavenging is not required.\n"); return false; @@ -1533,6 +1543,7 @@ const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC; Register SRegHi = MF.getRegInfo().createVirtualRegister(RC), SReg = MF.getRegInfo().createVirtualRegister(RC); + unsigned NewOpcode = 0u; // Insert a set of rA with the full offset value before the ld, st, or add if (isInt<16>(Offset)) @@ -1561,7 +1572,7 @@ OpC != TargetOpcode::INLINEASM_BR) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); - unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; + NewOpcode = ImmToIdxMap.find(OpC)->second; MI.setDesc(TII.get(NewOpcode)); OperandBase = 1; } else { @@ -1571,6 +1582,20 @@ Register StackReg = MI.getOperand(FIOperandNum).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); + + // Since these are not real X-Form instructions, we must + // add the registers and access 0(NewReg) rather than + // emitting the X-Form pseudo. + if (NewOpcode == PPC::LQX_PSEUDO || NewOpcode == PPC::STQX_PSEUDO) { + assert(is64Bit && "Quadword loads/stores only supported in 64-bit mode"); + Register NewReg = MF.getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(MBB, II, dl, TII.get(PPC::ADD8), NewReg) + .addReg(SReg, RegState::Kill) + .addReg(StackReg); + MI.setDesc(TII.get(NewOpcode == PPC::LQX_PSEUDO ? PPC::LQ : PPC::STQ)); + MI.getOperand(OperandBase + 1).ChangeToRegister(NewReg, false); + MI.getOperand(OperandBase).ChangeToImmediate(0); + } } Register PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/llvm/test/CodeGen/PowerPC/LQ-STQ-32bit-offset.ll b/llvm/test/CodeGen/PowerPC/LQ-STQ-32bit-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/LQ-STQ-32bit-offset.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr10 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mattr=+quadword-atomics -ppc-asm-full-reg-names -o - %s | FileCheck %s + +%struct.StructA = type { [16 x i8] } + +@s1 = dso_local global i128 324929342, align 16 + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local void @STQ() #0 { +; CHECK-LABEL: STQ: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lis r0, -16 +; CHECK-NEXT: ori r0, r0, 51488 +; CHECK-NEXT: stdux r1, r1, r0 +; CHECK-NEXT: .cfi_def_cfa_offset 997088 +; CHECK-NEXT: pld r3, s1@PCREL+8(0), 1 +; CHECK-NEXT: std r3, 40(r1) +; CHECK-NEXT: pld r3, s1@PCREL(0), 1 +; CHECK-NEXT: std r3, 32(r1) +; CHECK-NEXT: ld r3, 40(r1) +; CHECK-NEXT: ld r4, 32(r1) +; CHECK-NEXT: sync +; CHECK-NEXT: mr r5, r4 +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: lis r3, 15 +; CHECK-NEXT: ori r3, r3, 14032 +; CHECK-NEXT: add r3, r3, r1 +; CHECK-NEXT: stq r4, 0(r3) +; CHECK-NEXT: ld r1, 0(r1) +; CHECK-NEXT: blr +entry: + %s2 = alloca %struct.StructA, align 16 + %s3 = alloca %struct.StructA, align 16 + %arr = alloca [997003 x i8], align 1 + %tmp = alloca %struct.StructA, align 16 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %tmp, ptr align 16 @s1, i64 16, i1 false) + %0 = load i128, ptr %tmp, align 16 + store atomic i128 %0, ptr %s2 seq_cst, align 16 + ret void +} + +define dso_local void @LQ() #0 { +; CHECK-LABEL: LQ: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lis r0, -16 +; CHECK-NEXT: ori r0, r0, 51488 +; CHECK-NEXT: stdux r1, r1, r0 +; CHECK-NEXT: .cfi_def_cfa_offset 997088 +; CHECK-NEXT: pld r3, s1@PCREL+8(0), 1 +; CHECK-NEXT: std r3, 40(r1) +; CHECK-NEXT: pld r3, s1@PCREL(0), 1 +; CHECK-NEXT: std r3, 32(r1) +; CHECK-NEXT: sync +; CHECK-NEXT: lis r3, 15 +; CHECK-NEXT: ori r3, r3, 14016 +; CHECK-NEXT: add r3, r3, r1 +; CHECK-NEXT: lq r4, 0(r3) +; CHECK-NEXT: cmpd cr7, r5, r5 +; CHECK-NEXT: bne- cr7, .+4 +; CHECK-NEXT: isync +; CHECK-NEXT: ld r1, 0(r1) +; CHECK-NEXT: blr +entry: + %s2 = alloca %struct.StructA, align 16 + %s3 = alloca %struct.StructA, align 16 + %arr = alloca [997003 x i8], align 1 + %tmp = alloca %struct.StructA, align 16 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %tmp, ptr align 16 @s1, i64 16, i1 false) + %0 = load i128, ptr %tmp, align 16 + %1 = load atomic i128, ptr %s3 seq_cst, align 16 + ret void +} + +; Function Attrs: argmemonly nofree nounwind willreturn +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1 + +attributes #0 = { noinline optnone } diff --git a/llvm/test/CodeGen/PowerPC/LQ-STQ.ll b/llvm/test/CodeGen/PowerPC/LQ-STQ.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/LQ-STQ.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr10 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mattr=+quadword-atomics -ppc-asm-full-reg-names -o - %s | FileCheck %s + +%struct.StructA = type { [16 x i8] } + +@s1 = dso_local global %struct.StructA { [16 x i8] c"\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A" }, align 16 + +define dso_local void @test() { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: plxv vs0, s1@PCREL(0), 1 +; CHECK-NEXT: stxv vs0, -48(r1) +; CHECK-NEXT: ld r3, -40(r1) +; CHECK-NEXT: ld r4, -48(r1) +; CHECK-NEXT: sync +; CHECK-NEXT: mr r5, r4 +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: addi r3, r1, -16 +; CHECK-NEXT: stq r4, 0(r3) +; CHECK-NEXT: sync +; CHECK-NEXT: addi r3, r1, -16 +; CHECK-NEXT: lq r4, 0(r3) +; CHECK-NEXT: cmpd cr7, r5, r5 +; CHECK-NEXT: bne- cr7, .+4 +; CHECK-NEXT: isync +; CHECK-NEXT: std r4, -24(r1) +; CHECK-NEXT: std r5, -32(r1) +; CHECK-NEXT: blr +entry: + %s2 = alloca %struct.StructA, align 16 + %s3 = alloca %struct.StructA, align 16 + %agg.tmp.ensured = alloca %struct.StructA, align 16 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %agg.tmp.ensured, ptr align 16 @s1, i64 16, i1 false) + %0 = load i128, ptr %agg.tmp.ensured, align 16 + store atomic i128 %0, ptr %s2 seq_cst, align 16 + %atomic-load = load atomic i128, ptr %s2 seq_cst, align 16 + store i128 %atomic-load, ptr %s3, align 16 + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)