diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -489,6 +489,14 @@ LLVM_DEBUG(dbgs() << "TRUE - Memory operand is X-Form.\n"); return true; } + + // This is a spill/restore of a quadword. + if ((Opcode == PPC::RESTORE_QUADWORD) || (Opcode == PPC::SPILL_QUADWORD)) { + LLVM_DEBUG(dbgs() << "Memory Operand: " << InstrInfo->getName(Opcode) + << " for register " << printReg(Reg, this) << ".\n"); + LLVM_DEBUG(dbgs() << "TRUE - Memory operand is a quadword.\n"); + return true; + } } LLVM_DEBUG(dbgs() << "FALSE - Scavenging is not required.\n"); return false; @@ -1491,6 +1499,45 @@ Offset += MFI.getStackSize(); } + // LQ/STQ Instruction + if (OpC == PPC::LQ || OpC == PPC::STQ) { + assert(isInt<32>(Offset) && "Offset size cannot be expressed in 32-bits."); + Register StackReg = MI.getOperand(FIOperandNum).getReg(); + // Offset multiple of 16/4 for LQ/STQ respectively, and + // the offset fits in the instruction. + if (((Offset % offsetMinAlign(MI)) == 0) && isInt<16>(Offset)) { + MI.getOperand(1).ChangeToImmediate(Offset); + MI.getOperand(2).ChangeToRegister(StackReg, false); + } + // Offset not a multiple of 16/4 for LQ/STQ respectively, and/or the + // offset does not fit in the instruction. + else { + bool is64Bit = TM.isPPC64(); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC; + Register SReg = MF.getRegInfo().createVirtualRegister(RC); + // Offset not a multiple of 16/4 for LQ/STQ respectively, but + // fits in the instruction. + if (isInt<16>(Offset)) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ADDI8 : PPC::ADDI), SReg) + .addReg(StackReg) + .addImm(Offset); + // Offset exceeds 16-bits, could also be not a multiple of 16/4. + else { + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ADDIS8 : PPC::ADDIS), SReg) + .addReg(StackReg) + .addImm(Offset >> 16); + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) + .addReg(SReg) + .addImm(Offset); + } + MI.getOperand(1).ChangeToImmediate(0); + MI.getOperand(2).ChangeToRegister(SReg, false, false, true); + } + return; + } + // If we encounter an LXVP/STXVP with an offset that doesn't fit, we can // transform it to the prefixed version so we don't have to use the XForm. if ((OpC == PPC::LXVP || OpC == PPC::STXVP) && diff --git a/llvm/test/CodeGen/PowerPC/LQ-STQ-32bit-offset.ll b/llvm/test/CodeGen/PowerPC/LQ-STQ-32bit-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/LQ-STQ-32bit-offset.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -o - %s | FileCheck %s + +%struct.StructA = type { [16 x i8] } + +@s1 = dso_local global %struct.StructA { [16 x i8] c"\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A" }, align 16 + +define dso_local void @test() #0 { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK: sync +; CHECK-NEXT: addis [[REG1:r[0-9]+]], r31, [[OFF1:[0-9]+]] +; CHECK-NEXT: ori [[REG1]], [[REG1]], [[OFF2:[0-9]+]] +; CHECK-NEXT: stq r{{[0-9]+}}, 0([[REG1]]) +; CHECK-NEXT: sync +; CHECK-NEXT: addis [[REG2:r[0-9]+]], r31, [[OFF1]] +; CHECK-NEXT: ori [[REG2]], [[REG2]], [[OFF2]] +; CHECK-NEXT: lq r{{[0-9]+}}, 0([[REG2]]) +entry: + %s2 = alloca %struct.StructA, align 16 + %s3 = alloca %struct.StructA, align 16 + %arr = alloca [90100 x i32], align 4 + %agg.tmp.ensured = alloca %struct.StructA, align 16 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %agg.tmp.ensured, ptr align 16 @s1, i64 16, i1 false) + %0 = load i128, ptr %agg.tmp.ensured, align 16 + store atomic i128 %0, ptr %s2 seq_cst, align 16 + %atomic-load = load atomic i128, ptr %s2 seq_cst, align 16 + store i128 %atomic-load, ptr %s3, align 16 + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) + +attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr10" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+isa-v31-instructions,+mma,+paired-vector-memops,+pcrelative-memops,+power10-vector,+power8-vector,+power9-vector,+prefix-instrs,+quadword-atomics,+vsx,-htm,-privileged,-rop-protect,-spe" } diff --git a/llvm/test/CodeGen/PowerPC/LQ-STQ-unaligned-offset.mir b/llvm/test/CodeGen/PowerPC/LQ-STQ-unaligned-offset.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/LQ-STQ-unaligned-offset.mir @@ -0,0 +1,78 @@ +# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +# RUN: llc -mcpu=pwr8 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +# RUN: -ppc-asm-full-reg-names -start-before=prologepilog -o - %s | FileCheck %s + +--- | + target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" + target triple = "powerpc64le-unknown-linux-gnu" + + %struct.StructA = type { [16 x i8] } + + @s1 = dso_local global %struct.StructA { [16 x i8] c"\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A" }, align 16 + + define dso_local void @test() #0 { + ; CHECK-LABEL: test: + ; CHECK: # %bb.0: # %entry + ; CHECK: addi [[REG1:r[0-9]+]], r1, -15 + ; CHECK-NEXT: stq r{{[0-9]+}}, 0([[REG1]]) + ; CHECK-NEXT: addi [[REG2:r[0-9]+]], r1, -15 + ; CHECK-NEXT: sync + ; CHECK-NEXT: lq r{{[0-9]+}}, 0([[REG2]]) + entry: + %s2 = alloca %struct.StructA, align 16 + %s3 = alloca %struct.StructA, align 16 + %agg.tmp.ensured = alloca %struct.StructA, align 16 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %agg.tmp.ensured, ptr align 16 @s1, i64 16, i1 false) + %0 = load i128, ptr %agg.tmp.ensured, align 16 + call void @llvm.ppc.sync() + store atomic i128 %0, ptr %s2 monotonic, align 16 + call void @llvm.ppc.sync() + %atomic-load = load atomic i128, ptr %s2 monotonic, align 16 + call void @llvm.ppc.cfence.i128(i128 %atomic-load) + store i128 %atomic-load, ptr %s3, align 16 + ret void + } + + declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) + + declare void @llvm.ppc.sync() + + declare void @llvm.ppc.cfence.i128(i128) + +... +--- +name: test +alignment: 16 +tracksRegLiveness: true +frameInfo: + maxAlignment: 16 + localFrameSize: 48 +stack: + - { id: 0, name: s2, size: 16, alignment: 16, local-offset: -16 } + - { id: 1, name: s3, size: 16, alignment: 16, local-offset: -32 } + - { id: 2, name: agg.tmp.ensured, size: 16, alignment: 16, local-offset: -48 } +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x2 + renamable $x3 = ADDIStocHA8 $x2, @s1 + renamable $x3 = ADDItocL killed renamable $x3, @s1, implicit $x2 + renamable $x4 = LD 8, renamable $x3 :: (dereferenceable load (s64) from @s1 + 8) + STD killed renamable $x4, 8, %stack.2.agg.tmp.ensured :: (store (s64) into %ir.agg.tmp.ensured + 8, basealign 16) + renamable $x3 = LD 0, killed renamable $x3 :: (dereferenceable load (s64) from @s1, align 16) + STD killed renamable $x3, 0, %stack.2.agg.tmp.ensured :: (store (s64) into %ir.agg.tmp.ensured, align 16) + renamable $x4 = LD 8, %stack.2.agg.tmp.ensured :: (dereferenceable load (s64) from %ir.agg.tmp.ensured + 8, basealign 16) + renamable $x3 = LD 0, %stack.2.agg.tmp.ensured :: (dereferenceable load (s64) from %ir.agg.tmp.ensured, align 16) + SYNC 0 + renamable $g8p2 = BUILD_QUADWORD killed renamable $x3, killed renamable $x4 + STQ killed renamable $g8p2, 1, %stack.0.s2 + SYNC 0 + early-clobber renamable $g8p2 = LQ 1, %stack.0.s2 + renamable $x3 = COPY renamable $x5 + renamable $x4 = COPY renamable $x4, implicit killed $g8p2 + CFENCE8 renamable $x3, implicit-def dead $cr7 + STD killed renamable $x4, 8, %stack.1.s3 :: (store (s64) into %ir.s3 + 8, basealign 16) + STD killed renamable $x3, 0, %stack.1.s3 :: (store (s64) into %ir.s3, align 16) + BLR8 implicit $lr8, implicit $rm + +... diff --git a/llvm/test/CodeGen/PowerPC/LQ-STQ.ll b/llvm/test/CodeGen/PowerPC/LQ-STQ.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/LQ-STQ.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -o - %s | FileCheck %s + +%struct.StructA = type { [16 x i8] } + +@s1 = dso_local global %struct.StructA { [16 x i8] c"\0B\0C\0D\0E\0F\10\11\12\13\14\15\16\17\18\19\1A" }, align 16 + +define dso_local void @test() { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK: stq r{{[0-9]+}}, -[[OFF:[0-9]+]](r1) +; CHECK-NEXT: sync +; CHECK-NEXT: lq r{{[0-9]+}}, -[[OFF]](r1) +entry: + %s2 = alloca %struct.StructA, align 16 + %s3 = alloca %struct.StructA, align 16 + %agg.tmp.ensured = alloca %struct.StructA, align 16 + call void @llvm.memcpy.p0.p0.i64(ptr align 16 %agg.tmp.ensured, ptr align 16 @s1, i64 16, i1 false) + %0 = load i128, ptr %agg.tmp.ensured, align 16 + store atomic i128 %0, ptr %s2 seq_cst, align 16 + %atomic-load = load atomic i128, ptr %s2 seq_cst, align 16 + store i128 %atomic-load, ptr %s3, align 16 + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)