diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1343,7 +1343,8 @@ def D16PreservesUnusedBits : Predicate<"Subtarget->d16PreservesUnusedBits()">, - AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC))>; + AssemblerPredicate<(all_of FeatureGFX9Insts, (not FeatureSRAMECC), + (not FeatureGFX90AInsts))>; def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -591,7 +591,9 @@ } bool d16PreservesUnusedBits() const { - return hasD16LoadStore() && !TargetID.isSramEccOnOrAny(); + // gff90a's d16 loads don't preserve unused bits + return hasD16LoadStore() && !TargetID.isSramEccOnOrAny() && + !hasGFX90AInsts(); } bool hasD16Images() const {