Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -388,6 +388,8 @@ return AMDGPU::SGPR_96RegClassID; case 4: return AMDGPU::SReg_128RegClassID; + case 5: + return AMDGPU::SGPR_160RegClassID; case 8: return AMDGPU::SReg_256RegClassID; case 16: Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -158,6 +158,9 @@ setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::LOAD, MVT::v5f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32); + setOperationAction(ISD::LOAD, MVT::v8f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); @@ -246,6 +249,9 @@ setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::STORE, MVT::v5f32, Promote); + AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32); + setOperationAction(ISD::STORE, MVT::v8f32, Promote); AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); @@ -337,6 +343,8 @@ setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); @@ -345,6 +353,8 @@ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -339,6 +339,9 @@ } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo)) { O << 'v'; NumRegs = 3; + } else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo)) { + O << 'v'; + NumRegs = 5; } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) { O << 'v'; NumRegs = 8; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -134,6 +134,9 @@ addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass); + addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass); + addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); @@ -157,6 +160,7 @@ setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v3i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setOperationAction(ISD::LOAD, MVT::v5i32, Custom); setOperationAction(ISD::LOAD, MVT::v8i32, Custom); setOperationAction(ISD::LOAD, MVT::v16i32, Custom); setOperationAction(ISD::LOAD, MVT::i1, Custom); @@ -165,6 +169,7 @@ setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v3i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setOperationAction(ISD::STORE, MVT::v5i32, Custom); setOperationAction(ISD::STORE, MVT::v8i32, Custom); setOperationAction(ISD::STORE, MVT::v16i32, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); @@ -334,6 +339,12 @@ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i32, Expand); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4f32, Expand); + // Deal with vec5 vector operations when widened to vec8. + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5i32, Expand); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5f32, Expand); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i32, Expand); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8f32, Expand); + // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, // and output demarshalling setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -842,6 +842,8 @@ return AMDGPU::SI_SPILL_S96_SAVE; case 16: return AMDGPU::SI_SPILL_S128_SAVE; + case 20: + return AMDGPU::SI_SPILL_S160_SAVE; case 32: return AMDGPU::SI_SPILL_S256_SAVE; case 64: @@ -861,6 +863,8 @@ return AMDGPU::SI_SPILL_V96_SAVE; case 16: return AMDGPU::SI_SPILL_V128_SAVE; + case 20: + return AMDGPU::SI_SPILL_V160_SAVE; case 32: return AMDGPU::SI_SPILL_V256_SAVE; case 64: Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -493,6 +493,7 @@ defm SI_SPILL_S64 : SI_SPILL_SGPR ; defm SI_SPILL_S96 : SI_SPILL_SGPR ; defm SI_SPILL_S128 : SI_SPILL_SGPR ; +defm SI_SPILL_S160 : SI_SPILL_SGPR ; defm SI_SPILL_S256 : SI_SPILL_SGPR ; defm SI_SPILL_S512 : SI_SPILL_SGPR ; @@ -526,6 +527,7 @@ defm SI_SPILL_V64 : SI_SPILL_VGPR ; defm SI_SPILL_V96 : SI_SPILL_VGPR ; defm SI_SPILL_V128 : SI_SPILL_VGPR ; +defm SI_SPILL_V160 : SI_SPILL_VGPR ; defm SI_SPILL_V256 : SI_SPILL_VGPR ; defm SI_SPILL_V512 : SI_SPILL_VGPR ; @@ -826,6 +828,22 @@ >; } +foreach Index = 0-4 in { + def Extract_Element_v5i32_#Index : Extract_Element < + i32, v5i32, Index, !cast(sub#Index) + >; + def Insert_Element_v5i32_#Index : Insert_Element < + i32, v5i32, Index, !cast(sub#Index) + >; + + def Extract_Element_v5f32_#Index : Extract_Element < + f32, v5f32, Index, !cast(sub#Index) + >; + def Insert_Element_v5f32_#Index : Insert_Element < + f32, v5f32, Index, !cast(sub#Index) + >; +} + foreach Index = 0-7 in { def Extract_Element_v8i32_#Index : Extract_Element < i32, v8i32, Index, !cast(sub#Index) @@ -957,6 +975,10 @@ def : BitConvert ; def : BitConvert ; +// 160-bit bitcast +def : BitConvert ; +def : BitConvert ; + // 256-bit bitcast def : BitConvert ; def : BitConvert ; Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -405,6 +405,11 @@ case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V256_RESTORE: return 8; + case AMDGPU::SI_SPILL_S160_SAVE: + case AMDGPU::SI_SPILL_S160_RESTORE: + case AMDGPU::SI_SPILL_V160_SAVE: + case AMDGPU::SI_SPILL_V160_RESTORE: + return 5; case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_V128_SAVE: @@ -974,6 +979,7 @@ switch (MI->getOpcode()) { case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: @@ -981,6 +987,7 @@ return spillSGPR(MI, FI, RS, true); case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S96_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: @@ -1010,6 +1017,7 @@ // SGPR register spill case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: @@ -1021,6 +1029,7 @@ // SGPR register restore case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S96_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: @@ -1032,6 +1041,7 @@ // VGPR register spill case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: + case AMDGPU::SI_SPILL_V160_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: @@ -1054,6 +1064,7 @@ case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: + case AMDGPU::SI_SPILL_V160_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { const MachineOperand *VData = TII->getNamedOperand(*MI, @@ -1244,6 +1255,8 @@ &AMDGPU::SReg_96RegClass, &AMDGPU::VReg_128RegClass, &AMDGPU::SReg_128RegClass, + &AMDGPU::VReg_160RegClass, + &AMDGPU::SReg_160RegClass, &AMDGPU::VReg_256RegClass, &AMDGPU::SReg_256RegClass, &AMDGPU::VReg_512RegClass, @@ -1276,6 +1289,8 @@ return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; case 128: return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; + case 160: + return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr; case 256: return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; case 512: @@ -1296,6 +1311,8 @@ return &AMDGPU::VReg_96RegClass; case 128: return &AMDGPU::VReg_128RegClass; + case 160: + return &AMDGPU::VReg_160RegClass; case 256: return &AMDGPU::VReg_256RegClass; case 512: @@ -1316,6 +1333,8 @@ return &AMDGPU::SReg_96RegClass; case 128: return &AMDGPU::SReg_128RegClass; + case 160: + return &AMDGPU::SReg_160RegClass; case 256: return &AMDGPU::SReg_256RegClass; case 512: @@ -1342,6 +1361,8 @@ return &AMDGPU::SReg_96RegClass; case 4: return &AMDGPU::SReg_128RegClass; + case 5: + return &AMDGPU::SReg_160RegClass; case 8: return &AMDGPU::SReg_256RegClass; case 16: /* fall-through */ @@ -1358,6 +1379,8 @@ return &AMDGPU::VReg_96RegClass; case 4: return &AMDGPU::VReg_128RegClass; + case 5: + return &AMDGPU::VReg_160RegClass; case 8: return &AMDGPU::VReg_256RegClass; case 16: /* fall-through */ @@ -1610,6 +1633,9 @@ case 128: return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass : &AMDGPU::SReg_128RegClass; + case 160: + return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass : + &AMDGPU::SReg_160RegClass; default: llvm_unreachable("not implemented"); } Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -14,6 +14,7 @@ list ret2 = [sub0, sub1]; list ret3 = [sub0, sub1, sub2]; list ret4 = [sub0, sub1, sub2, sub3]; + list ret5 = [sub0, sub1, sub2, sub3, sub4]; list ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7]; list ret16 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, @@ -23,7 +24,8 @@ list ret = !if(!eq(size, 2), ret2, !if(!eq(size, 3), ret3, !if(!eq(size, 4), ret4, - !if(!eq(size, 8), ret8, ret16)))); + !if(!eq(size, 5), ret5, + !if(!eq(size, 8), ret8, ret16))))); } //===----------------------------------------------------------------------===// @@ -187,6 +189,14 @@ (add (decimate (shl SGPR_32, 2), 4)), (add (decimate (shl SGPR_32, 3), 4))]>; +// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs. +def SGPR_160Regs : RegisterTuples.ret, + [(add (decimate SGPR_32, 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4)), + (add (decimate (shl SGPR_32, 4), 4))]>; + // SGPR 256-bit registers def SGPR_256Regs : RegisterTuples.ret, [(add (decimate SGPR_32, 4)), @@ -369,6 +379,14 @@ (add (shl VGPR_32, 2)), (add (shl VGPR_32, 3))]>; +// VGPR 160-bit registers +def VGPR_160 : RegisterTuples.ret, + [(add (trunc VGPR_32, 252)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3)), + (add (shl VGPR_32, 4))]>; + // VGPR 256-bit registers def VGPR_256 : RegisterTuples.ret, [(add (trunc VGPR_32, 249)), @@ -491,6 +509,18 @@ } // End CopyCost = 2 +// There are no 5-component scalar instructions, but this is needed +// for symmetry with VGPRs. +def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, + (add SGPR_160Regs)> { + let AllocationPriority = 11; +} + +def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, + (add SGPR_160)> { + let AllocationPriority = 11; +} + def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> { let AllocationPriority = 11; } @@ -546,6 +576,14 @@ let AllocationPriority = 4; } +def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, (add VGPR_160)> { + let Size = 160; + + // Requires 5 v_mov_b32 to copy + let CopyCost = 5; + let AllocationPriority = 5; +} + def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> { let Size = 256; let CopyCost = 8; Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -821,6 +821,10 @@ case AMDGPU::SReg_128RegClassID: case AMDGPU::VReg_128RegClassID: return 128; + case AMDGPU::SGPR_160RegClassID: + case AMDGPU::SReg_160RegClassID: + case AMDGPU::VReg_160RegClassID: + return 160; case AMDGPU::SReg_256RegClassID: case AMDGPU::VReg_256RegClassID: return 256;