Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -65,48 +65,93 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { - setOperationAction(ISD::Constant, MVT::i32, Legal); - setOperationAction(ISD::Constant, MVT::i64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + // Lower floating point store/load to integer store/load to reduce the number + // of patterns in tablegen. + setOperationAction(ISD::LOAD, MVT::f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::LOAD, MVT::v2f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); - // This is totally unsupported, just custom lower to produce an error. - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); - // We need to custom lower some of the intrinsics - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::LOAD, MVT::v8f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); - // Library functions. These default to Expand, but we have instructions - // for them. - setOperationAction(ISD::FCEIL, MVT::f32, Legal); - setOperationAction(ISD::FEXP2, MVT::f32, Legal); - setOperationAction(ISD::FPOW, MVT::f32, Legal); - setOperationAction(ISD::FLOG2, MVT::f32, Legal); - setOperationAction(ISD::FABS, MVT::f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::f32, Legal); - setOperationAction(ISD::FRINT, MVT::f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - setOperationAction(ISD::FMINNUM, MVT::f32, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction(ISD::LOAD, MVT::v16f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); - setOperationAction(ISD::FROUND, MVT::f32, Custom); - setOperationAction(ISD::FROUND, MVT::f64, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Promote); + AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); - setOperationAction(ISD::FREM, MVT::f32, Custom); - setOperationAction(ISD::FREM, MVT::f64, Custom); + setOperationAction(ISD::LOAD, MVT::v2i64, Promote); + AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32); - // v_mad_f32 does not support denormals according to some sources. - if (!Subtarget->hasFP32Denormals()) - setOperationAction(ISD::FMAD, MVT::f32, Legal); + setOperationAction(ISD::LOAD, MVT::f64, Promote); + AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32); - // Expand to fneg + fadd. - setOperationAction(ISD::FSUB, MVT::f64, Expand); + setOperationAction(ISD::LOAD, MVT::v2f64, Promote); + AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v4i32); + + // There are no 64-bit extloads. These should be done as a 32-bit extload and + // an extension to 64-bit. + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand); + } + + for (MVT VT : MVT::integer_valuetypes()) { + if (VT == MVT::i64) + continue; + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); + + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); + } + + for (MVT VT : MVT::integer_vector_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand); + } + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); + + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand); + + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); - // Lower floating point store/load to integer store/load to reduce the number - // of patterns in tablegen. setOperationAction(ISD::STORE, MVT::f32, Promote); AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); @@ -122,51 +167,99 @@ setOperationAction(ISD::STORE, MVT::v16f32, Promote); AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); + setOperationAction(ISD::STORE, MVT::i64, Promote); + AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); + + setOperationAction(ISD::STORE, MVT::v2i64, Promote); + AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32); + setOperationAction(ISD::STORE, MVT::f64, Promote); - AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32); setOperationAction(ISD::STORE, MVT::v2f64, Promote); - AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v2i64); - - // Custom lowering of vector stores is required for local address space - // stores. - setOperationAction(ISD::STORE, MVT::v4i32, Custom); + AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32); - setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); - setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); - // XXX: This can be change to Custom, once ExpandVectorStores can - // handle 64-bit stores. + setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); - setTruncStoreAction(MVT::i64, MVT::i16, Expand); - setTruncStoreAction(MVT::i64, MVT::i8, Expand); + setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); + setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); + setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i1, Expand); + setTruncStoreAction(MVT::i64, MVT::i8, Expand); + setTruncStoreAction(MVT::i64, MVT::i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i32, Expand); + setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand); - setTruncStoreAction(MVT::v4i64, MVT::v4i1, Expand); + setTruncStoreAction(MVT::v2i64, MVT::v2i8, Expand); + setTruncStoreAction(MVT::v2i64, MVT::v2i16, Expand); + setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); + setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); + setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); - setOperationAction(ISD::LOAD, MVT::f32, Promote); - AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); - setOperationAction(ISD::LOAD, MVT::v2f32, Promote); - AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); + setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); + setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand); - setOperationAction(ISD::LOAD, MVT::v4f32, Promote); - AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand); + setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand); - setOperationAction(ISD::LOAD, MVT::v8f32, Promote); - AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); + setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand); + setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand); - setOperationAction(ISD::LOAD, MVT::v16f32, Promote); - AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); - setOperationAction(ISD::LOAD, MVT::f64, Promote); - AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); + setOperationAction(ISD::Constant, MVT::i32, Legal); + setOperationAction(ISD::Constant, MVT::i64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - setOperationAction(ISD::LOAD, MVT::v2f64, Promote); - AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v2i64); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + + // This is totally unsupported, just custom lower to produce an error. + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + + // We need to custom lower some of the intrinsics + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + + // Library functions. These default to Expand, but we have instructions + // for them. + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FEXP2, MVT::f32, Legal); + setOperationAction(ISD::FPOW, MVT::f32, Legal); + setOperationAction(ISD::FLOG2, MVT::f32, Legal); + setOperationAction(ISD::FABS, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + + setOperationAction(ISD::FROUND, MVT::f32, Custom); + setOperationAction(ISD::FROUND, MVT::f64, Custom); + + setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); + + setOperationAction(ISD::FREM, MVT::f32, Custom); + setOperationAction(ISD::FREM, MVT::f64, Custom); + + // v_mad_f32 does not support denormals according to some sources. + if (!Subtarget->hasFP32Denormals()) + setOperationAction(ISD::FMAD, MVT::f32, Legal); + + // Expand to fneg + fadd. + setOperationAction(ISD::FSUB, MVT::f64, Expand); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); @@ -179,31 +272,6 @@ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); - // There are no 64-bit extloads. These should be done as a 32-bit extload and - // an extension to 64-bit. - for (MVT VT : MVT::integer_valuetypes()) { - setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand); - setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand); - } - - for (MVT VT : MVT::integer_vector_valuetypes()) { - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand); - } - - setOperationAction(ISD::BR_CC, MVT::i1, Expand); - if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { setOperationAction(ISD::FCEIL, MVT::f64, Custom); setOperationAction(ISD::FTRUNC, MVT::f64, Custom); @@ -219,28 +287,13 @@ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); - - setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); - - setTruncStoreAction(MVT::f32, MVT::f16, Expand); - setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); - setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); - setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); - - setTruncStoreAction(MVT::f64, MVT::f16, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { - setOperationAction(ISD::SREM, VT, Expand); + // These should use [SU]DIVREM, so set them to expand setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); // GPU does not have divrem function for signed or unsigned. setOperationAction(ISD::SDIVREM, VT, Custom); @@ -378,23 +431,6 @@ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); } - setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); - - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::MUL); - setTargetDAGCombine(ISD::SELECT); - setTargetDAGCombine(ISD::SELECT_CC); - setTargetDAGCombine(ISD::STORE); - - setTargetDAGCombine(ISD::FADD); - setTargetDAGCombine(ISD::FSUB); - - setTargetDAGCombine(ISD::BITCAST); - setBooleanContents(ZeroOrNegativeOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); @@ -425,6 +461,18 @@ MaxStoresPerMemcpy = 4096; MaxStoresPerMemmove = 4096; MaxStoresPerMemset = 4096; + + setTargetDAGCombine(ISD::BITCAST); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SELECT); + setTargetDAGCombine(ISD::SELECT_CC); + setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::FADD); + setTargetDAGCombine(ISD::FSUB); } //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -33,15 +33,44 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI) : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) { - addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass); addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass); + addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); computeRegisterProperties(STI.getRegisterInfo()); + // Legalize loads and stores to the private address space. + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); + setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + + // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address + // spaces, so it is custom lowered to handle those where it isn't. + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom); + + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom); + + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); + } + + setOperationAction(ISD::STORE, MVT::i8, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); + setOperationAction(ISD::STORE, MVT::v4i32, Custom); + + setTruncStoreAction(MVT::i32, MVT::i8, Custom); + setTruncStoreAction(MVT::i32, MVT::i16, Custom); + // Set condition code actions setCondCodeAction(ISD::SETO, MVT::f32, Expand); setCondCodeAction(ISD::SETUO, MVT::f32, Expand); @@ -73,10 +102,6 @@ setOperationAction(ISD::FSUB, MVT::f32, Expand); - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -122,37 +147,6 @@ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); - - // Legalize loads and stores to the private address space. - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::LOAD, MVT::v2i32, Custom); - setOperationAction(ISD::LOAD, MVT::v4i32, Custom); - - // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address - // spaces, so it is custom lowered to handle those where it isn't. - for (MVT VT : MVT::integer_valuetypes()) { - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom); - - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom); - - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); - } - - setOperationAction(ISD::STORE, MVT::i8, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::v2i32, Custom); - setOperationAction(ISD::STORE, MVT::v4i32, Custom); - setTruncStoreAction(MVT::i32, MVT::i8, Custom); - setTruncStoreAction(MVT::i32, MVT::i16, Custom); - - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom); @@ -165,12 +159,6 @@ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - setTargetDAGCombine(ISD::FP_ROUND); - setTargetDAGCombine(ISD::FP_TO_SINT); - setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); - setTargetDAGCombine(ISD::SELECT_CC); - setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32 // to be Legal/Custom in order to avoid library calls. setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); @@ -188,6 +176,13 @@ } setSchedulingPreference(Sched::Source); + + + setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::FP_TO_SINT); + setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::SELECT_CC); + setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); } static inline bool isEOP(MachineBasicBlock::iterator I) { Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -73,50 +73,25 @@ computeRegisterProperties(STI.getRegisterInfo()); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); - - setOperationAction(ISD::ADD, MVT::i32, Legal); - setOperationAction(ISD::ADDC, MVT::i32, Legal); - setOperationAction(ISD::ADDE, MVT::i32, Legal); - setOperationAction(ISD::SUBC, MVT::i32, Legal); - setOperationAction(ISD::SUBE, MVT::i32, Legal); - - setOperationAction(ISD::FSIN, MVT::f32, Custom); - setOperationAction(ISD::FCOS, MVT::f32, Custom); - - setOperationAction(ISD::FMINNUM, MVT::f64, Legal); - setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); - // We need to custom lower vector stores from local memory + setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setOperationAction(ISD::LOAD, MVT::v8i32, Custom); setOperationAction(ISD::LOAD, MVT::v16i32, Custom); + setOperationAction(ISD::LOAD, MVT::i1, Custom); - setOperationAction(ISD::LOAD, MVT::f64, Promote); - AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32); - - setOperationAction(ISD::LOAD, MVT::i64, Promote); - AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); - - setOperationAction(ISD::LOAD, MVT::v2i32, Custom); - + setOperationAction(ISD::STORE, MVT::v2i32, Custom); + setOperationAction(ISD::STORE, MVT::v4i32, Custom); setOperationAction(ISD::STORE, MVT::v8i32, Custom); setOperationAction(ISD::STORE, MVT::v16i32, Custom); - setOperationAction(ISD::STORE, MVT::i1, Custom); - setOperationAction(ISD::STORE, MVT::v4i32, Custom); - - setOperationAction(ISD::STORE, MVT::f64, Promote); - AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32); - setOperationAction(ISD::STORE, MVT::i64, Promote); - AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); - - setOperationAction(ISD::STORE, MVT::v2i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand); + setOperationAction(ISD::SELECT, MVT::i1, Promote); setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f64, Promote); AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); @@ -125,119 +100,38 @@ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); setOperationAction(ISD::SETCC, MVT::i1, Promote); setOperationAction(ISD::SETCC, MVT::v2i1, Expand); setOperationAction(ISD::SETCC, MVT::v4i1, Expand); - setOperationAction(ISD::BSWAP, MVT::i32, Legal); - setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); - // On SI this is s_memtime and s_memrealtime on VI. - setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); - - for (MVT VT : MVT::integer_valuetypes()) { - if (VT == MVT::i64) - continue; - - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); - - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); - - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal); - setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); - } - - for (MVT VT : MVT::integer_vector_valuetypes()) { - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i16, Expand); - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v16i16, Expand); - } - - for (MVT VT : MVT::fp_valuetypes()) - setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - - setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); - - setTruncStoreAction(MVT::i64, MVT::i32, Expand); - setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); - setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand); - setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); - - - setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand); - - setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); - setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand); - - setOperationAction(ISD::LOAD, MVT::i1, Custom); - - setOperationAction(ISD::LOAD, MVT::v2i64, Promote); - AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32); - - setOperationAction(ISD::STORE, MVT::v2i64, Promote); - AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32); - - setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand); - - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - setOperationAction(ISD::FrameIndex, MVT::i32, Custom); - - // These should use UDIVREM, so set them to expand - setOperationAction(ISD::UDIV, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - - setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); - setOperationAction(ISD::SELECT, MVT::i1, Promote); - - setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand); - - - setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); - // We only support LOAD/STORE and vector manipulation ops for vectors // with > 4 elements. for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64}) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { - switch(Op) { + switch (Op) { case ISD::LOAD: case ISD::STORE: case ISD::BUILD_VECTOR: @@ -274,15 +168,10 @@ AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v4i32); } - if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { - setOperationAction(ISD::FTRUNC, MVT::f64, Legal); - setOperationAction(ISD::FCEIL, MVT::f64, Legal); - setOperationAction(ISD::FRINT, MVT::f64, Legal); - } - - setOperationAction(ISD::FFLOOR, MVT::f64, Legal); - setOperationAction(ISD::FDIV, MVT::f32, Custom); - setOperationAction(ISD::FDIV, MVT::f64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, // and output demarshalling @@ -299,6 +188,29 @@ setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); } + setOperationAction(ISD::BSWAP, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + + // On SI this is s_memtime and s_memrealtime on VI. + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + + setOperationAction(ISD::FMINNUM, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); + + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + } + + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + + setOperationAction(ISD::FSIN, MVT::f32, Custom); + setOperationAction(ISD::FCOS, MVT::f32, Custom); + setOperationAction(ISD::FDIV, MVT::f32, Custom); + setOperationAction(ISD::FDIV, MVT::f64, Custom); + + setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); setTargetDAGCombine(ISD::FMINNUM); Index: test/CodeGen/AMDGPU/global-extload-i8.ll =================================================================== --- test/CodeGen/AMDGPU/global-extload-i8.ll +++ test/CodeGen/AMDGPU/global-extload-i8.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @@ -262,26 +262,26 @@ ret void } -; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64: -; XSI: s_endpgm -; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { -; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in -; %ext = zext <32 x i8> %load to <32 x i64> -; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out -; ret void -; } +; FUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64: +; SI: s_endpgm +define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i8>, <32 x i8> addrspace(1)* %in + %ext = zext <32 x i8> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} -; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64: -; XSI: s_endpgm -; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { -; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in -; %ext = sext <32 x i8> %load to <32 x i64> -; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out -; ret void -; } +; FUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64: +; SI: s_endpgm +define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind { + %load = load <32 x i8>, <32 x i8> addrspace(1)* %in + %ext = sext <32 x i8> %load to <32 x i64> + store <32 x i64> %ext, <32 x i64> addrspace(1)* %out + ret void +} -; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64: -; XSI: s_endpgm +; ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64: +; ; XSI: s_endpgm ; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { ; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in ; %ext = zext <64 x i8> %load to <64 x i64> @@ -289,8 +289,8 @@ ; ret void ; } -; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64: -; XSI: s_endpgm +; ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64: +; ; XSI: s_endpgm ; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind { ; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in ; %ext = sext <64 x i8> %load to <64 x i64> Index: test/CodeGen/AMDGPU/store-v3i64.ll =================================================================== --- test/CodeGen/AMDGPU/store-v3i64.ll +++ test/CodeGen/AMDGPU/store-v3i64.ll @@ -1,29 +1,128 @@ -; XFAIL: * -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; SI-LABEL: {{^}}global_store_v3i64: -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 +; GCN-LABEL: {{^}}global_store_v3i64: +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} define void @global_store_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %x) { store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 32 ret void } -; SI-LABEL: {{^}}global_store_v3i64_unaligned: +; GCN-LABEL: {{^}}global_store_v3i64_unaligned: +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte + +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte + +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte + +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte + +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte + +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte +; GCN: buffer_store_byte define void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) { store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1 ret void } -; SI-LABEL: {{^}}local_store_v3i64: +; GCN-LABEL: {{^}}local_store_v3i64: +; GCN: ds_write_b64 +; GCN: ds_write_b64 +; GCN: ds_write_b64 define void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) { store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32 ret void } -; SI-LABEL: {{^}}local_store_v3i64_unaligned: -define void @local_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) { - store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1 +; GCN-LABEL: {{^}}local_store_v3i64_unaligned: +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 + +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 + +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 + +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 + +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 + +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +; GCN: ds_write_b8 +define void @local_store_v3i64_unaligned(<3 x i64> addrspace(3)* %out, <3 x i64> %x) { + store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 1 + ret void +} + +; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i32: +; GCN-DAG: buffer_store_dwordx2 +; GCN-DAG: buffer_store_dword v +define void @global_truncstore_v3i64_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i64> %x) { + %trunc = trunc <3 x i64> %x to <3 x i32> + store <3 x i32> %trunc, <3 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i16: +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_dword v +define void @global_truncstore_v3i64_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i64> %x) { + %trunc = trunc <3 x i64> %x to <3 x i16> + store <3 x i16> %trunc, <3 x i16> addrspace(1)* %out + ret void +} + + +; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i8: +; GCN-DAG: buffer_store_short +; GCN-DAG: buffer_store_byte v +define void @global_truncstore_v3i64_to_v3i8(<3 x i8> addrspace(1)* %out, <3 x i64> %x) { + %trunc = trunc <3 x i64> %x to <3 x i8> + store <3 x i8> %trunc, <3 x i8> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i1: +; GCN-DAG: buffer_store_byte v +; GCN-DAG: buffer_store_byte v +; GCN-DAG: buffer_store_byte v +define void @global_truncstore_v3i64_to_v3i1(<3 x i1> addrspace(1)* %out, <3 x i64> %x) { + %trunc = trunc <3 x i64> %x to <3 x i1> + store <3 x i1> %trunc, <3 x i1> addrspace(1)* %out ret void }