Index: lib/Target/AMDGPU/R600ISelLowering.h =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.h +++ lib/Target/AMDGPU/R600ISelLowering.h @@ -41,6 +41,10 @@ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, + unsigned Align, + bool *IsFast) const override; + private: unsigned Gen; /// Each OpenCL kernel has nine implicit parameters that are stored in the Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1784,6 +1784,26 @@ return VT.changeVectorElementTypeToInteger(); } +bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + unsigned Align, + bool *IsFast) const { + if (IsFast) + *IsFast = false; + + if (!VT.isSimple() || VT == MVT::Other) + return false; + + if (VT.bitsLT(MVT::i32)) + return false; + + // TODO: This is a rough estimate. + if (IsFast) + *IsFast = true; + + return VT.bitsGT(MVT::i32) && Align % 4 == 0; +} + static SDValue CompactSwizzlableVector( SelectionDAG &DAG, SDValue VectorEntry, DenseMap &RemapSwizzle) { Index: test/CodeGen/AMDGPU/store.ll =================================================================== --- test/CodeGen/AMDGPU/store.ll +++ test/CodeGen/AMDGPU/store.ll @@ -358,20 +358,13 @@ ret void } -attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } - ; When i128 was a legal type this program generated cannot select errors: ; FUNC-LABEL: {{^}}"i128-const-store": -; FIXME: We should be able to to this with one store instruction -; EG: STORE_RAW -; EG: STORE_RAW -; EG: STORE_RAW -; EG: STORE_RAW -; CM: STORE_DWORD -; CM: STORE_DWORD -; CM: STORE_DWORD -; CM: STORE_DWORD +; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 1 + +; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}, T{{[0-9]+}}.X + ; SI: buffer_store_dwordx4 define void @i128-const-store(i32 addrspace(1)* %out) { entry: @@ -384,3 +377,5 @@ store i32 2, i32 addrspace(1)* %arrayidx6, align 4 ret void } + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }