Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -110,11 +110,12 @@ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, - CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>, + CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v5i32, v5f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>, CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>, CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>, CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>, CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>, + CCIfType<[v5i32, v5f32], CCAssignToStack<20, 4>>, CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>, CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>> ]>; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -544,6 +544,8 @@ return AMDGPU::SGPR_96RegClassID; case 4: return AMDGPU::SReg_128RegClassID; + case 5: + return AMDGPU::SGPR_160RegClassID; case 8: return AMDGPU::SReg_256RegClassID; case 16: Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -156,6 +156,9 @@ setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::LOAD, MVT::v5f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32); + setOperationAction(ISD::LOAD, MVT::v8f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); @@ -244,6 +247,9 @@ setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::STORE, MVT::v5f32, Promote); + AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32); + setOperationAction(ISD::STORE, MVT::v8f32, Promote); AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); @@ -335,6 +341,8 @@ setOperationAction(ISD::CONCAT_VECTORS, MVT::v3f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v5i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v5f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); @@ -343,6 +351,8 @@ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3i32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5f32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); @@ -402,7 +412,7 @@ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); static const MVT::SimpleValueType VectorIntTypes[] = { - MVT::v2i32, MVT::v3i32, MVT::v4i32 + MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32 }; for (MVT VT : VectorIntTypes) { @@ -444,7 +454,7 @@ } static const MVT::SimpleValueType FloatVectorTypes[] = { - MVT::v2f32, MVT::v3f32, MVT::v4f32 + MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32 }; for (MVT VT : FloatVectorTypes) { @@ -492,6 +502,9 @@ setOperationAction(ISD::SELECT, MVT::v4f32, Promote); AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::SELECT, MVT::v5f32, Promote); + AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32); + // There are no libcalls of any kind. for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) setLibcallName(static_cast(I), nullptr); Index: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -357,6 +357,9 @@ } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo)) { O << 'v'; NumRegs = 3; + } else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo)) { + O << 'v'; + NumRegs = 5; } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) { O << 'v'; NumRegs = 8; Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -132,6 +132,9 @@ addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass); + addRegisterClass(MVT::v5f32, &AMDGPU::VReg_160RegClass); + addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); @@ -155,6 +158,7 @@ setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v3i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setOperationAction(ISD::LOAD, MVT::v5i32, Custom); setOperationAction(ISD::LOAD, MVT::v8i32, Custom); setOperationAction(ISD::LOAD, MVT::v16i32, Custom); setOperationAction(ISD::LOAD, MVT::i1, Custom); @@ -163,6 +167,7 @@ setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v3i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setOperationAction(ISD::STORE, MVT::v5i32, Custom); setOperationAction(ISD::STORE, MVT::v8i32, Custom); setOperationAction(ISD::STORE, MVT::v16i32, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); @@ -336,6 +341,12 @@ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i32, Expand); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4f32, Expand); + // Deal with vec5 vector operations when widened to vec8. + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5i32, Expand); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v5f32, Expand); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i32, Expand); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8f32, Expand); + // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, // and output demarshalling setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); @@ -9688,6 +9699,9 @@ case 128: RC = &AMDGPU::SReg_128RegClass; break; + case 160: + RC = &AMDGPU::SReg_160RegClass; + break; case 256: RC = &AMDGPU::SReg_256RegClass; break; @@ -9713,6 +9727,9 @@ case 128: RC = &AMDGPU::VReg_128RegClass; break; + case 160: + RC = &AMDGPU::VReg_160RegClass; + break; case 256: RC = &AMDGPU::VReg_256RegClass; break; Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -845,6 +845,8 @@ return AMDGPU::SI_SPILL_S96_SAVE; case 16: return AMDGPU::SI_SPILL_S128_SAVE; + case 20: + return AMDGPU::SI_SPILL_S160_SAVE; case 32: return AMDGPU::SI_SPILL_S256_SAVE; case 64: @@ -864,6 +866,8 @@ return AMDGPU::SI_SPILL_V96_SAVE; case 16: return AMDGPU::SI_SPILL_V128_SAVE; + case 20: + return AMDGPU::SI_SPILL_V160_SAVE; case 32: return AMDGPU::SI_SPILL_V256_SAVE; case 64: @@ -949,6 +953,8 @@ return AMDGPU::SI_SPILL_S96_RESTORE; case 16: return AMDGPU::SI_SPILL_S128_RESTORE; + case 20: + return AMDGPU::SI_SPILL_S160_RESTORE; case 32: return AMDGPU::SI_SPILL_S256_RESTORE; case 64: @@ -968,6 +974,8 @@ return AMDGPU::SI_SPILL_V96_RESTORE; case 16: return AMDGPU::SI_SPILL_V128_RESTORE; + case 20: + return AMDGPU::SI_SPILL_V160_RESTORE; case 32: return AMDGPU::SI_SPILL_V256_RESTORE; case 64: Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -481,6 +481,7 @@ defm SI_SPILL_S64 : SI_SPILL_SGPR ; defm SI_SPILL_S96 : SI_SPILL_SGPR ; defm SI_SPILL_S128 : SI_SPILL_SGPR ; +defm SI_SPILL_S160 : SI_SPILL_SGPR ; defm SI_SPILL_S256 : SI_SPILL_SGPR ; defm SI_SPILL_S512 : SI_SPILL_SGPR ; @@ -514,6 +515,7 @@ defm SI_SPILL_V64 : SI_SPILL_VGPR ; defm SI_SPILL_V96 : SI_SPILL_VGPR ; defm SI_SPILL_V128 : SI_SPILL_VGPR ; +defm SI_SPILL_V160 : SI_SPILL_VGPR ; defm SI_SPILL_V256 : SI_SPILL_VGPR ; defm SI_SPILL_V512 : SI_SPILL_VGPR ; @@ -771,6 +773,22 @@ >; } +foreach Index = 0-4 in { + def Extract_Element_v5i32_#Index : Extract_Element < + i32, v5i32, Index, !cast(sub#Index) + >; + def Insert_Element_v5i32_#Index : Insert_Element < + i32, v5i32, Index, !cast(sub#Index) + >; + + def Extract_Element_v5f32_#Index : Extract_Element < + f32, v5f32, Index, !cast(sub#Index) + >; + def Insert_Element_v5f32_#Index : Insert_Element < + f32, v5f32, Index, !cast(sub#Index) + >; +} + foreach Index = 0-7 in { def Extract_Element_v8i32_#Index : Extract_Element < i32, v8i32, Index, !cast(sub#Index) @@ -900,6 +918,10 @@ def : BitConvert ; def : BitConvert ; +// 160-bit bitcast +def : BitConvert ; +def : BitConvert ; + // 256-bit bitcast def : BitConvert ; def : BitConvert ; Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -410,6 +410,11 @@ case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V256_RESTORE: return 8; + case AMDGPU::SI_SPILL_S160_SAVE: + case AMDGPU::SI_SPILL_S160_RESTORE: + case AMDGPU::SI_SPILL_V160_SAVE: + case AMDGPU::SI_SPILL_V160_RESTORE: + return 5; case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_V128_SAVE: @@ -979,6 +984,7 @@ switch (MI->getOpcode()) { case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: @@ -986,6 +992,7 @@ return spillSGPR(MI, FI, RS, true); case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S96_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: @@ -1015,6 +1022,7 @@ // SGPR register spill case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S160_SAVE: case AMDGPU::SI_SPILL_S128_SAVE: case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S64_SAVE: @@ -1026,6 +1034,7 @@ // SGPR register restore case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S160_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_S96_RESTORE: case AMDGPU::SI_SPILL_S64_RESTORE: @@ -1037,6 +1046,7 @@ // VGPR register spill case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: + case AMDGPU::SI_SPILL_V160_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: @@ -1059,6 +1069,7 @@ case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V96_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: + case AMDGPU::SI_SPILL_V160_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { const MachineOperand *VData = TII->getNamedOperand(*MI, @@ -1251,6 +1262,8 @@ &AMDGPU::SReg_96RegClass, &AMDGPU::VReg_128RegClass, &AMDGPU::SReg_128RegClass, + &AMDGPU::VReg_160RegClass, + &AMDGPU::SReg_160RegClass, &AMDGPU::VReg_256RegClass, &AMDGPU::SReg_256RegClass, &AMDGPU::VReg_512RegClass, @@ -1283,6 +1296,8 @@ return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; case 128: return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; + case 160: + return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr; case 256: return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; case 512: @@ -1303,6 +1318,8 @@ return &AMDGPU::VReg_96RegClass; case 128: return &AMDGPU::VReg_128RegClass; + case 160: + return &AMDGPU::VReg_160RegClass; case 256: return &AMDGPU::VReg_256RegClass; case 512: @@ -1323,6 +1340,8 @@ return &AMDGPU::SReg_96RegClass; case 128: return &AMDGPU::SReg_128RegClass; + case 160: + return &AMDGPU::SReg_160RegClass; case 256: return &AMDGPU::SReg_256RegClass; case 512: @@ -1349,6 +1368,8 @@ return &AMDGPU::SReg_96RegClass; case 4: return &AMDGPU::SReg_128RegClass; + case 5: + return &AMDGPU::SReg_160RegClass; case 8: return &AMDGPU::SReg_256RegClass; case 16: /* fall-through */ @@ -1365,6 +1386,8 @@ return &AMDGPU::VReg_96RegClass; case 4: return &AMDGPU::VReg_128RegClass; + case 5: + return &AMDGPU::VReg_160RegClass; case 8: return &AMDGPU::VReg_256RegClass; case 16: /* fall-through */ @@ -1427,6 +1450,10 @@ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, }; + static const int16_t Sub0_4[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, + }; + static const int16_t Sub0_3[] = { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, }; @@ -1448,6 +1475,8 @@ return makeArrayRef(Sub0_2); case 128: return makeArrayRef(Sub0_3); + case 160: + return makeArrayRef(Sub0_4); case 256: return makeArrayRef(Sub0_7); case 512: @@ -1618,6 +1647,9 @@ case 128: return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass : &AMDGPU::SReg_128RegClass; + case 160: + return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass : + &AMDGPU::SReg_160RegClass; case 256: return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass : &AMDGPU::SReg_256RegClass; Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td @@ -14,6 +14,7 @@ list ret2 = [sub0, sub1]; list ret3 = [sub0, sub1, sub2]; list ret4 = [sub0, sub1, sub2, sub3]; + list ret5 = [sub0, sub1, sub2, sub3, sub4]; list ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7]; list ret16 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, @@ -23,7 +24,8 @@ list ret = !if(!eq(size, 2), ret2, !if(!eq(size, 3), ret3, !if(!eq(size, 4), ret4, - !if(!eq(size, 8), ret8, ret16)))); + !if(!eq(size, 5), ret5, + !if(!eq(size, 8), ret8, ret16))))); } //===----------------------------------------------------------------------===// @@ -190,6 +192,14 @@ (add (decimate (shl SGPR_32, 2), 4)), (add (decimate (shl SGPR_32, 3), 4))]>; +// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs. +def SGPR_160Regs : RegisterTuples.ret, + [(add (decimate SGPR_32, 4)), + (add (decimate (shl SGPR_32, 1), 4)), + (add (decimate (shl SGPR_32, 2), 4)), + (add (decimate (shl SGPR_32, 3), 4)), + (add (decimate (shl SGPR_32, 4), 4))]>; + // SGPR 256-bit registers def SGPR_256Regs : RegisterTuples.ret, [(add (decimate SGPR_32, 4)), @@ -372,6 +382,14 @@ (add (shl VGPR_32, 2)), (add (shl VGPR_32, 3))]>; +// VGPR 160-bit registers +def VGPR_160 : RegisterTuples.ret, + [(add (trunc VGPR_32, 252)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2)), + (add (shl VGPR_32, 3)), + (add (shl VGPR_32, 4))]>; + // VGPR 256-bit registers def VGPR_256 : RegisterTuples.ret, [(add (trunc VGPR_32, 249)), @@ -505,6 +523,18 @@ } // End CopyCost = 2 +// There are no 5-component scalar instructions, but this is needed +// for symmetry with VGPRs. +def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, + (add SGPR_160Regs)> { + let AllocationPriority = 12; +} + +def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, + (add SGPR_160)> { + let AllocationPriority = 12; +} + def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> { let AllocationPriority = 13; } @@ -565,6 +595,14 @@ let AllocationPriority = 4; } +def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, (add VGPR_160)> { + let Size = 160; + + // Requires 5 v_mov_b32 to copy + let CopyCost = 5; + let AllocationPriority = 5; +} + def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> { let Size = 256; let CopyCost = 8; Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -822,6 +822,10 @@ case AMDGPU::SReg_128RegClassID: case AMDGPU::VReg_128RegClassID: return 128; + case AMDGPU::SGPR_160RegClassID: + case AMDGPU::SReg_160RegClassID: + case AMDGPU::VReg_160RegClassID: + return 160; case AMDGPU::SReg_256RegClassID: case AMDGPU::VReg_256RegClassID: return 256; Index: llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll +++ llvm/trunk/test/CodeGen/AMDGPU/select-vectors.ll @@ -281,6 +281,23 @@ ret void } +; GCN-LABEL: {{^}}s_select_v5f32: +; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} + +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 + +; GCN: buffer_store_dwordx +define amdgpu_kernel void @s_select_v5f32(<5 x float> addrspace(1)* %out, <5 x float> %a, <5 x float> %b, i32 %c) #0 { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <5 x float> %a, <5 x float> %b + store <5 x float> %select, <5 x float> addrspace(1)* %out, align 16 + ret void +} + ; GCN-LABEL: {{^}}select_v8f32: ; GCN: v_cndmask_b32_e32 ; GCN: v_cndmask_b32_e32 Index: llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll +++ llvm/trunk/test/CodeGen/AMDGPU/spill-wide-sgpr.ll @@ -139,6 +139,66 @@ ret void } +; ALL-LABEL: {{^}}spill_sgpr_x5: +; SMEM: s_add_u32 m0, s3, 0x100{{$}} +; SMEM: s_buffer_store_dword s +; SMEM: s_buffer_store_dword s +; SMEM: s_buffer_store_dword s +; SMEM: s_buffer_store_dword s +; SMEM: s_buffer_store_dword s +; SMEM: s_cbranch_scc1 + +; SMEM: s_add_u32 m0, s3, 0x100{{$}} +; SMEM: s_buffer_load_dword s +; SMEM: s_buffer_load_dword s +; SMEM: s_buffer_load_dword s +; SMEM: s_buffer_load_dword s +; SMEM: s_buffer_load_dword s +; SMEM: s_dcache_wb +; SMEM: s_endpgm + +; FIXME: Should only need 4 bytes +; SMEM: ScratchSize: 24 + +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 +; VGPR: s_cbranch_scc1 + +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 + + +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: s_cbranch_scc1 + +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +define amdgpu_kernel void @spill_sgpr_x5(i32 addrspace(1)* %out, i32 %in) #0 { + %wide.sgpr = call <5 x i32> asm sideeffect "; def $0", "=s" () #0 + %cmp = icmp eq i32 %in, 0 + br i1 %cmp, label %bb0, label %ret + +bb0: + call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr) #0 + br label %ret + +ret: + ret void +} + ; ALL-LABEL: {{^}}spill_sgpr_x8: ; SMEM: s_add_u32 m0, s3, 0x100{{$}}