diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -337,17 +337,6 @@ IE = PPC::VRRCRegClass.end(); I != IE; ++I) markSuperRegs(Reserved, *I); - if (Subtarget.isAIXABI() && Subtarget.hasAltivec() && - !TM.getAIXExtendedAltivecABI()) { - // In the AIX default Altivec ABI, vector registers VR20-VR31 are reserved - // and cannot be used. - for (auto Reg : CSR_Altivec_SaveList) { - if (Reg == 0) - break; - markSuperRegs(Reserved, Reg); - } - } - assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -340,12 +340,24 @@ (sequence "F%u", 31, 14))>; def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>; +def VRRC_NO_CSR : + RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64, f128], 128, + (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11, + V12, V13, V14, V15, V16, V17, V18, V19)>; def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64, f128], 128, (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, - V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>; + V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)> { + let AltOrders = [(add VRRC_NO_CSR)]; + let AltOrderSelect = [{ + return !MF.getSubtarget() + .getTargetMachine() + .getAIXExtendedAltivecABI() && + MF.getSubtarget().isAIXABI(); + }]; +} // VSX register classes (the allocation order mirrors that of the corresponding // subregister classes). @@ -353,16 +365,44 @@ (add (sequence "VSL%u", 0, 13), (sequence "VSL%u", 31, 14))>; def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128, - (add VSLRC, VRRC)>; + (add VSLRC, VRRC)> { + let AltOrders = [(add VSLRC, VRRC_NO_CSR)]; + let AltOrderSelect = [{ + return !MF.getSubtarget() + .getTargetMachine() + .getAIXExtendedAltivecABI() && + MF.getSubtarget().isAIXABI(); + }]; +} +def VFRC_NO_CSR : RegisterClass<"PPC", [f64], 64, + (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7, + VF8, VF9, VF10, VF11, VF12, VF13, VF14, + VF15, VF16, VF17, VF18, VF19)>; // Register classes for the 64-bit "scalar" VSX subregisters. def VFRC : RegisterClass<"PPC", [f64], 64, (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7, VF8, VF9, VF10, VF11, VF12, VF13, VF14, VF15, VF16, VF17, VF18, VF19, VF31, VF30, VF29, VF28, VF27, VF26, VF25, VF24, VF23, - VF22, VF21, VF20)>; -def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; + VF22, VF21, VF20)> { + let AltOrders = [(add VFRC_NO_CSR)]; + let AltOrderSelect = [{ + return !MF.getSubtarget() + .getTargetMachine() + .getAIXExtendedAltivecABI() && + MF.getSubtarget().isAIXABI(); + }]; +} +def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)> { + let AltOrders = [(add F8RC, VFRC_NO_CSR)]; + let AltOrderSelect = [{ + return !MF.getSubtarget() + .getTargetMachine() + .getAIXExtendedAltivecABI() && + MF.getSubtarget().isAIXABI(); + }]; +} // Allow spilling GPR's into caller-saved VSR's. def SPILLTOVSRRC : RegisterClass<"PPC", [i64, f64], 64, (add G8RC, (sub VSFRC, @@ -370,7 +410,15 @@ (sequence "F%u", 31, 14)))>; // Register class for single precision scalars in VSX registers -def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>; +def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)> { + let AltOrders = [(add F8RC, VFRC_NO_CSR)]; + let AltOrderSelect = [{ + return !MF.getSubtarget() + .getTargetMachine() + .getAIXExtendedAltivecABI() && + MF.getSubtarget().isAIXABI(); + }]; +} def CRBITRC : RegisterClass<"PPC", [i1], 32, (add CR2LT, CR2GT, CR2EQ, CR2UN, diff --git a/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll b/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll @@ -0,0 +1,155 @@ +;; Test to ensure that we are not using any of the aliased reserved registers +;; under the Extended Altivec ABI on AIX. +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+altivec \ +; RUN: -stop-after=machine-cp -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=DFLABI +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+altivec -vec-extabi\ +; RUN: -stop-after=machine-cp -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=EXTABI + +define double @dbl_test(double %a, double* %b) local_unnamed_addr { +entry: + %0 = load volatile double, double* %b, align 4 + %add = fadd double %0, %a + store volatile double %add, double* %b, align 4 + ;; Clobbered all vector and floating point registers. In the default Altivec + ;; ABI this forces a register spill since no registers are free to use. + tail call void asm sideeffect "nop", "~{v19},~{v18},~{v17},~{v16},~{v15},~{v14},~{v13},~{v12},~{v11},~{v10},~{v9},~{v8},~{v7},~{v6},~{v5},~{v4},~{v3},~{v2},~{v1},~{v0},~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() + %mul = fmul double %a, %a + %1 = load volatile double, double* %b, align 4 + %add1 = fadd double %mul, %1 + store volatile double %add1, double* %b, align 4 + %2 = load volatile double, double* %b, align 4 + ret double %2 +} + +define <4 x i32> @vec_test(<4 x i32> %a, <4 x i32>* %b) local_unnamed_addr { +entry: + %0 = load volatile <4 x i32>, <4 x i32>* %b, align 4 + %add = add <4 x i32> %0, %a + store volatile <4 x i32> %add, <4 x i32>* %b, align 4 + tail call void asm sideeffect "nop", "~{v19},~{v18},~{v17},~{v16},~{v15},~{v14},~{v13},~{v12},~{v11},~{v10},~{v9},~{v8},~{v7},~{v6},~{v5},~{v4},~{v3},~{v2},~{v1},~{v0},~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() + %mul = mul <4 x i32> %a, %a + %1 = load volatile <4 x i32>, <4 x i32>* %b, align 4 + %add1 = add <4 x i32> %mul, %1 + store volatile <4 x i32> %add1, <4 x i32>* %b, align 4 + %2 = load volatile <4 x i32>, <4 x i32>* %b, align 4 + ret <4 x i32> %2 +} + +; DFLABI-LABEL: dbl_test + +; DFLABI-NOT: $v20 +; DFLABI-NOT: $v21 +; DFLABI-NOT: $v22 +; DFLABI-NOT: $v23 +; DFLABI-NOT: $v24 +; DFLABI-NOT: $v25 +; DFLABI-NOT: $v26 +; DFLABI-NOT: $v27 +; DFLABI-NOT: $v28 +; DFLABI-NOT: $v29 +; DFLABI-NOT: $v30 +; DFLABI-NOT: $v31 + +; DFLABI-NOT: $vf20 +; DFLABI-NOT: $vf21 +; DFLABI-NOT: $vf22 +; DFLABI-NOT: $vf23 +; DFLABI-NOT: $vf24 +; DFLABI-NOT: $vf25 +; DFLABI-NOT: $vf26 +; DFLABI-NOT: $vf27 +; DFLABI-NOT: $vf28 +; DFLABI-NOT: $vf29 +; DFLABI-NOT: $vf30 +; DFLABI-NOT: $vf31 + +; DFLABI-NOT: $vs20 +; DFLABI-NOT: $vs21 +; DFLABI-NOT: $vs22 +; DFLABI-NOT: $vs23 +; DFLABI-NOT: $vs24 +; DFLABI-NOT: $vs25 +; DFLABI-NOT: $vs26 +; DFLABI-NOT: $vs27 +; DFLABI-NOT: $vs28 +; DFLABI-NOT: $vs29 +; DFLABI-NOT: $vs30 +; DFLABI-NOT: $vs31 + +; EXTABI-LABEL: vec_test +; EXTABI: liveins: +; EXTABI-NEXT: - { reg: '$f1', virtual-reg: '' } +; EXTABI-NEXT: - { reg: '$x4', virtual-reg: '' } +; EXTABI: body: | +; EXTABI: bb.0.entry: +; EXTABI: liveins: $f1, $x4 +; EXTABI-DAG: renamable $f0 = XFLOADf64 $zero8, renamable $x4 :: (volatile load 8 from %ir.b, align 4) +; EXTABI-DAG: renamable $f0 = nofpexcept XSADDDP killed renamable $f0, renamable $f1, implicit $rm +; EXTABI-DAG: renamable $vf31 = nofpexcept XSMULDP killed renamable $f1, renamable $f1, implicit $rm +; EXTABI: XFSTOREf64 killed renamable $f0, $zero8, renamable $x4 :: (volatile store 8 into %ir.b, align 4) +; EXTABI-LABEL: INLINEASM +; EXTABI-DAG: renamable $f0 = XFLOADf64 $zero8, renamable $x4 :: (volatile load 8 from %ir.b, align 4) +; EXTABI-DAG: renamable $f0 = nofpexcept XSADDDP killed renamable $vf31, killed renamable $f0, implicit $rm +; EXTABI-DAG: XFSTOREf64 killed renamable $f0, $zero8, renamable $x4 :: (volatile store 8 into %ir.b, align 4) +; EXTABI: renamable $f1 = XFLOADf64 $zero8, killed renamable $x4 :: (volatile load 8 from %ir.b, align 4) + +; DFLABI-LABEL: vec_test + +; DFLABI-NOT: $v20 +; DFLABI-NOT: $v21 +; DFLABI-NOT: $v22 +; DFLABI-NOT: $v23 +; DFLABI-NOT: $v24 +; DFLABI-NOT: $v25 +; DFLABI-NOT: $v26 +; DFLABI-NOT: $v27 +; DFLABI-NOT: $v28 +; DFLABI-NOT: $v29 +; DFLABI-NOT: $v30 +; DFLABI-NOT: $v31 + +; DFLABI-NOT: $vf20 +; DFLABI-NOT: $vf21 +; DFLABI-NOT: $vf22 +; DFLABI-NOT: $vf23 +; DFLABI-NOT: $vf24 +; DFLABI-NOT: $vf25 +; DFLABI-NOT: $vf26 +; DFLABI-NOT: $vf27 +; DFLABI-NOT: $vf28 +; DFLABI-NOT: $vf29 +; DFLABI-NOT: $vf30 +; DFLABI-NOT: $vf31 + +; DFLABI-NOT: $vs20 +; DFLABI-NOT: $vs21 +; DFLABI-NOT: $vs22 +; DFLABI-NOT: $vs23 +; DFLABI-NOT: $vs24 +; DFLABI-NOT: $vs25 +; DFLABI-NOT: $vs26 +; DFLABI-NOT: $vs27 +; DFLABI-NOT: $vs28 +; DFLABI-NOT: $vs29 +; DFLABI-NOT: $vs30 +; DFLABI-NOT: $vs31 + +; EXTABI-LABEL: vec_test + +; EXTABI: liveins: +; EXTABI-NEXT: - { reg: '$v2', virtual-reg: '' } +; EXTABI-NEXT: - { reg: '$x3', virtual-reg: '' } +; EXTABI: body: | +; EXTABI-DAG: bb.0.entry: +; EXTABI-DAG: liveins: $v2, $x3 +; EXTABI-DAG: renamable $v3 = LXVW4X $zero8, renamable $x3 :: (volatile load 16 from %ir.b, align 4) +; EXTABI-DAG: renamable $v31 = COPY $v2 +; EXTABI-DAG: renamable $v2 = VADDUWM killed renamable $v3, $v2 +; EXTABI-LABEL: INLINEASM +; EXTABI-DAG: renamable $v2 = LXVW4X $zero8, renamable $x3 :: (volatile load 16 from %ir.b, align 4) +; EXTABI-DAG: renamable $v3 = VMULUWM killed renamable $v31, renamable $v31 +; EXTABI-DAG: renamable $v2 = VADDUWM killed renamable $v3, killed renamable $v2 +; EXTABI-DAG: STXVW4X killed renamable $v2, $zero8, renamable $x3 :: (volatile store 16 into %ir.b, align 4) +; EXTABI: renamable $v2 = LXVW4X $zero8, killed renamable $x3 :: (volatile load 16 from %ir.b, align 4)