diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td --- a/llvm/lib/Target/PowerPC/P10InstrResources.td +++ b/llvm/lib/Target/PowerPC/P10InstrResources.td @@ -1370,7 +1370,7 @@ LDCIX, LHZCIX, LWZCIX, - MTSPR, MTSPR8, MTSR, MTVRSAVE, MTVRSAVEv + MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv )>; // Expand instructions @@ -1467,7 +1467,7 @@ // 13 Cycles Unknown operations, 1 input operands def : InstRW<[P10W_MFL_13C, P10W_DISP_EVEN, P10W_DISP_ANY], (instrs - MFSPR, MFSPR8, MFSR, MFTB8, MFVRSAVE, MFVRSAVEv + MFSPR, MFSPR8, MFSR, MFTB8, MFUDSCR, MFVRSAVE, MFVRSAVEv )>; // 10 Cycles SIMD Matrix Multiply Engine operations, 0 input operands diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -940,6 +940,7 @@ (instregex "M(T|F)TB(8)?$"), (instregex "MF(SPR|CTR|LR)(8)?$"), (instregex "M(T|F)MSR(D)?$"), + (instregex "M(T|F)(U)?DSCR$"), (instregex "MTSPR(8)?$") )>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3069,6 +3069,22 @@ } } +let hasSideEffects = 1 in { + def MTUDSCR : XFXForm_7_ext<31, 467, 3, (outs), (ins gprc:$rX), + "mtspr 3, $rX", IIC_SprMTSPR>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFUDSCR : XFXForm_1_ext<31, 339, 3, (outs gprc:$rX), (ins), + "mfspr $rX, 3", IIC_SprMFSPR>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +//Disable this alias on AIX for now because as does not support them. +let Predicates = [ModernAs] in { +// Aliases for moving to/from dscr to mtspr/mfspr +def : InstAlias<"mtudscr $Rx", (MTUDSCR gprc:$Rx)>; +def : InstAlias<"mfudscr $Rx", (MFUDSCR gprc:$Rx)>; +} + let isCodeGenOnly = 1 in { // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed // like a GPR on the PPC970. As such, copies in and out have the same diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -46,6 +47,10 @@ RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true), cl::desc("Run pre-emit peephole optimizations.")); +static cl::opt +DSCRValue("ppc-set-dscr", cl::Hidden, + cl::desc("Set the Data Stream Control Register.")); + namespace { static bool hasPCRelativeForm(MachineInstr &Use) { @@ -407,6 +412,35 @@ } bool runOnMachineFunction(MachineFunction &MF) override { + // If the user wants to set the DSCR using command-line options, + // load in the specified value at the start of main. + if (DSCRValue.getNumOccurrences() > 0 && MF.getName().equals("main") + && MF.getFunction().hasExternalLinkage()) { + DSCRValue = (uint32_t) (DSCRValue & 0x01FFFFFF); // 25-bit DSCR mask + RegScavenger RS; + MachineBasicBlock &MBB = MF.front(); + // Find an unused GPR according to register liveness + RS.enterBasicBlock(MBB); + unsigned InDSCR = RS.FindUnusedReg(&PPC::GPRCRegClass); + if (InDSCR) { + const PPCInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + DebugLoc dl; + MachineBasicBlock::iterator IP = MBB.begin(); // Insert Point + // Copy the 32-bit DSCRValue integer into the GPR InDSCR using LIS and + // ORI, then move to DSCR. If the requested DSCR value is contained + // in a 16-bit signed number, we can emit a single `LI`, but the impact + // of saving one instruction in one function does not warrant any + // additional complexity in the logic here. + BuildMI(MBB, IP, dl, TII->get(PPC::LIS), InDSCR) + .addImm(DSCRValue >> 16); + BuildMI(MBB, IP, dl, TII->get(PPC::ORI), InDSCR) + .addReg(InDSCR, RegState::Kill) + .addImm(DSCRValue & 0xFFFF); + BuildMI(MBB, IP, dl, TII->get(PPC::MTUDSCR)).addReg(InDSCR, RegState::Kill); + } else + errs() << "Warning: Ran out of registers - Unable to set DSCR as requested"; + } + if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { // Remove UNENCODED_NOP even when this pass is disabled. // This needs to be done unconditionally so we don't emit zeros diff --git a/llvm/test/CodeGen/PowerPC/dscr-set.ll b/llvm/test/CodeGen/PowerPC/dscr-set.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/dscr-set.ll @@ -0,0 +1,23 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=pwr9 \ +; RUN: | FileCheck %s -check-prefix=DEFAULT +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=pwr9 \ +; RUN: -ppc-set-dscr=0xFFFFFFFFFFFFFFFF | FileCheck %s -check-prefix=UPPER +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=pwr9 \ +; RUN: -ppc-set-dscr=0x01C4 | FileCheck %s -check-prefix=LOWER + +define i32 @main() { + +; CHECK: # %bb.0: + +; DEFAULT-NOT: mtudscr + +; UPPER: lis [[inReg:[0-9]+]], 511 +; UPPER-NEXT: ori [[inReg]], [[inReg]], 65535 +; UPPER-NEXT: mtudscr [[inReg]] + +; LOWER: lis [[inReg:[0-9]+]], 0 +; LOWER-NEXT: ori [[inReg]], [[inReg]], 452 +; LOWER-NEXT: mtudscr [[inReg]] + + ret i32 1 +} diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt --- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt @@ -850,6 +850,12 @@ # CHECK: mfspr 2, 600 0x7c 0x58 0x92 0xa6 +# CHECK: mtudscr 2 +0x7c 0x43 0x03 0xa6 + +# CHECK: mfudscr 2 +0x7c 0x43 0x02 0xa6 + # CHECK: mtcrf 123, 2 0x7c 0x47 0xb1 0x20 diff --git a/llvm/test/MC/PowerPC/ppc64-encoding-ext.s b/llvm/test/MC/PowerPC/ppc64-encoding-ext.s --- a/llvm/test/MC/PowerPC/ppc64-encoding-ext.s +++ b/llvm/test/MC/PowerPC/ppc64-encoding-ext.s @@ -3431,6 +3431,12 @@ # CHECK-BE: mfrtcl 2 # encoding: [0x7c,0x45,0x02,0xa6] # CHECK-LE: mfrtcl 2 # encoding: [0xa6,0x02,0x45,0x7c] mfrtcl 2 +# CHECK-BE: mtudscr 2 # encoding: [0x7c,0x43,0x03,0xa6] +# CHECK-LE: mtudscr 2 # encoding: [0xa6,0x03,0x43,0x7c] + mtudscr 2 +# CHECK-BE: mfudscr 2 # encoding: [0x7c,0x43,0x02,0xa6] +# CHECK-LE: mfudscr 2 # encoding: [0xa6,0x02,0x43,0x7c] + mfudscr 2 # CHECK-BE: mtdscr 2 # encoding: [0x7c,0x51,0x03,0xa6] # CHECK-LE: mtdscr 2 # encoding: [0xa6,0x03,0x51,0x7c] mtdscr 2