diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -113,30 +113,36 @@
 /// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with
 /// the modified arguments.
 static Optional<Instruction *> modifyIntrinsicCall(
-    IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC,
+    IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
+    InstCombiner &IC,
     std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
         Func) {
   SmallVector<Type *, 4> ArgTys;
-  if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
+  if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
     return None;
 
-  SmallVector<Value *, 8> Args(II.args());
+  SmallVector<Value *, 8> Args(OldIntr.args());
 
   // Modify arguments and types
   Func(Args, ArgTys);
 
-  Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys);
+  Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
 
   CallInst *NewCall = IC.Builder.CreateCall(I, Args);
-  NewCall->takeName(&II);
-  NewCall->copyMetadata(II);
+  NewCall->takeName(&OldIntr);
+  NewCall->copyMetadata(OldIntr);
   if (isa<FPMathOperator>(NewCall))
-    NewCall->copyFastMathFlags(&II);
+    NewCall->copyFastMathFlags(&OldIntr);
 
   // Erase and replace uses
-  if (!II.getType()->isVoidTy())
-    IC.replaceInstUsesWith(II, NewCall);
-  return IC.eraseInstFromFunction(II);
+  if (!InstToReplace.getType()->isVoidTy())
+    IC.replaceInstUsesWith(InstToReplace, NewCall);
+
+  auto RetValue = IC.eraseInstFromFunction(InstToReplace);
+  if (!OldIntr.isIdenticalTo(&InstToReplace))
+    IC.eraseInstFromFunction(OldIntr);
+
+  return RetValue;
 }
 
 static Optional<Instruction *>
@@ -153,7 +159,7 @@
             AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
                                                      ImageDimIntr->Dim);
         return modifyIntrinsicCall(
-            II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+            II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
               Args.erase(Args.begin() + ImageDimIntr->LodIndex);
             });
       }
@@ -170,7 +176,7 @@
             AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
                                                      ImageDimIntr->Dim);
         return modifyIntrinsicCall(
-            II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+            II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
               Args.erase(Args.begin() + ImageDimIntr->MipIndex);
             });
       }
@@ -187,7 +193,7 @@
             AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
                                                      ImageDimIntr->Dim);
         return modifyIntrinsicCall(
-            II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+            II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
               Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
               ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
             });
@@ -205,13 +211,41 @@
             AMDGPU::getImageDimIntrinsicByBaseOpcode(
                 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
         return modifyIntrinsicCall(
-            II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+            II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
               Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
             });
       }
     }
   }
 
+  // Try to use D16
+  if (ST->hasD16LoadStore()) {
+
+    const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+        AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
+
+    if (BaseOpcode->HasD16) {
+
+      // If the only use of image intrinsic is a fptrunc (with conversion to
+      // half) then both fptrunc and image intrinsic will be replaced with image
+      // intrinsic with D16 flag.
+      if (II.hasOneUse()) {
+        Instruction *user = II.user_back();
+
+        if (user->getOpcode() == Instruction::FPTrunc &&
+            user->getType()->getScalarType()->isHalfTy()) {
+
+          return modifyIntrinsicCall(II, *user, ImageDimIntr->Intr, IC,
+                                     [&](auto &Args, auto &ArgTys) {
+                                       // Change return type of image intrinsic.
+                                       // Set it to return type of fptrunc.
+                                       ArgTys[0] = user->getType();
+                                     });
+        }
+      }
+    }
+  }
+
   // Try to use A16 or G16
   if (!ST->hasA16() && !ST->hasG16())
     return None;
@@ -263,7 +297,7 @@
                                : Type::getInt16Ty(II.getContext());
 
   return modifyIntrinsicCall(
-      II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
+      II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
         ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
         if (!OnlyDerivatives) {
           ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/image-sample-half.ll b/llvm/test/Transforms/InstCombine/AMDGPU/image-sample-half.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/image-sample-half.ll
@@ -0,0 +1,129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx810 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX81 %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx900 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX9PLUS %s
+; RUN: opt -mtriple=amdgcn--amdpal -mcpu=gfx1010 -S -passes=instcombine %s | FileCheck --check-prefixes=GFX9PLUS %s
+
+define amdgpu_ps half @image_sample_2d_fptrunc_to_d16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; GFX81-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX81-NEXT:  main_body:
+; GFX81-NEXT:    [[TEX:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX81-NEXT:    [[TEX_HALF:%.*]] = fptrunc float [[TEX]] to half
+; GFX81-NEXT:    ret half [[TEX_HALF]]
+;
+; GFX9PLUS-LABEL: @image_sample_2d_fptrunc_to_d16(
+; GFX9PLUS-NEXT:  main_body:
+; GFX9PLUS-NEXT:    [[TEX:%.*]] = call half @llvm.amdgcn.image.sample.lz.2d.f16.f32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9PLUS-NEXT:    ret half [[TEX]]
+;
+main_body:
+  %tex = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %tex_half = fptrunc float %tex to half
+  ret half %tex_half
+}
+
+define amdgpu_ps half @image_sample_2d_v2f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; GFX81-LABEL: @image_sample_2d_v2f32(
+; GFX81-NEXT:  main_body:
+; GFX81-NEXT:    [[TEX:%.*]] = call reassoc nnan nsz arcp contract afn <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX81-NEXT:    [[TEX_2_HALF:%.*]] = fptrunc <2 x float> [[TEX]] to <2 x half>
+; GFX81-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX_2_HALF]], i64 0
+; GFX81-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX_2_HALF]], i64 1
+; GFX81-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX81-NEXT:    ret half [[ADDF_SUM_0]]
+;
+; GFX9PLUS-LABEL: @image_sample_2d_v2f32(
+; GFX9PLUS-NEXT:  main_body:
+; GFX9PLUS-NEXT:    [[TEX:%.*]] = call reassoc nnan nsz arcp contract afn <2 x half> @llvm.amdgcn.image.sample.lz.2d.v2f16.f32(i32 3, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <2 x half> [[TEX]], i64 0
+; GFX9PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <2 x half> [[TEX]], i64 1
+; GFX9PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9PLUS-NEXT:    ret half [[ADDF_SUM_0]]
+;
+main_body:
+  %tex = call reassoc nnan nsz arcp contract afn <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 3, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %tex_2_half = fptrunc <2 x float> %tex to <2 x half>
+  %tex_half_0 = extractelement <2 x half> %tex_2_half, i64 0
+  %tex_half_1 = extractelement <2 x half> %tex_2_half, i64 1
+  %addf_sum.0 = fadd half %tex_half_0, %tex_half_1
+  ret half %addf_sum.0
+}
+
+define amdgpu_ps half @image_sample_2d_v3f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; GFX81-LABEL: @image_sample_2d_v3f32(
+; GFX81-NEXT:  main_body:
+; GFX81-NEXT:    [[TEX:%.*]] = call reassoc nnan nsz arcp contract afn <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX81-NEXT:    [[TEX_3_HALF:%.*]] = fptrunc <3 x float> [[TEX]] to <3 x half>
+; GFX81-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX_3_HALF]], i64 0
+; GFX81-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX_3_HALF]], i64 1
+; GFX81-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX_3_HALF]], i64 2
+; GFX81-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX81-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX81-NEXT:    ret half [[ADDF_SUM_1]]
+;
+; GFX9PLUS-LABEL: @image_sample_2d_v3f32(
+; GFX9PLUS-NEXT:  main_body:
+; GFX9PLUS-NEXT:    [[TEX:%.*]] = call reassoc nnan nsz arcp contract afn <3 x half> @llvm.amdgcn.image.sample.lz.2d.v3f16.f32(i32 7, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <3 x half> [[TEX]], i64 0
+; GFX9PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <3 x half> [[TEX]], i64 1
+; GFX9PLUS-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <3 x half> [[TEX]], i64 2
+; GFX9PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9PLUS-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[ADDF_SUM_0]], [[TEX_HALF_2]]
+; GFX9PLUS-NEXT:    ret half [[ADDF_SUM_1]]
+;
+main_body:
+  %tex = call reassoc nnan nsz arcp contract afn <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32(i32 7, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %tex_3_half = fptrunc <3 x float> %tex to <3 x half>
+  %tex_half_0 = extractelement <3 x half> %tex_3_half, i64 0
+  %tex_half_1 = extractelement <3 x half> %tex_3_half, i64 1
+  %tex_half_2 = extractelement <3 x half> %tex_3_half, i64 2
+  %addf_sum.0 = fadd half %tex_half_0, %tex_half_1
+  %addf_sum.1 = fadd half %addf_sum.0, %tex_half_2
+  ret half %addf_sum.1
+}
+
+define amdgpu_ps half @image_sample_2d_v4f32(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; GFX81-LABEL: @image_sample_2d_v4f32(
+; GFX81-NEXT:  main_body:
+; GFX81-NEXT:    [[TEX:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX81-NEXT:    [[TEX_4_HALF:%.*]] = fptrunc <4 x float> [[TEX]] to <4 x half>
+; GFX81-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX_4_HALF]], i64 0
+; GFX81-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX_4_HALF]], i64 1
+; GFX81-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX_4_HALF]], i64 2
+; GFX81-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX_4_HALF]], i64 3
+; GFX81-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX81-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX81-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX81-NEXT:    ret half [[ADDF_SUM_2]]
+;
+; GFX9PLUS-LABEL: @image_sample_2d_v4f32(
+; GFX9PLUS-NEXT:  main_body:
+; GFX9PLUS-NEXT:    [[TEX:%.*]] = call reassoc nnan nsz arcp contract afn <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; GFX9PLUS-NEXT:    [[TEX_HALF_0:%.*]] = extractelement <4 x half> [[TEX]], i64 0
+; GFX9PLUS-NEXT:    [[TEX_HALF_1:%.*]] = extractelement <4 x half> [[TEX]], i64 1
+; GFX9PLUS-NEXT:    [[TEX_HALF_2:%.*]] = extractelement <4 x half> [[TEX]], i64 2
+; GFX9PLUS-NEXT:    [[TEX_HALF_3:%.*]] = extractelement <4 x half> [[TEX]], i64 3
+; GFX9PLUS-NEXT:    [[ADDF_SUM_0:%.*]] = fadd half [[TEX_HALF_0]], [[TEX_HALF_1]]
+; GFX9PLUS-NEXT:    [[ADDF_SUM_1:%.*]] = fadd half [[TEX_HALF_2]], [[TEX_HALF_3]]
+; GFX9PLUS-NEXT:    [[ADDF_SUM_2:%.*]] = fadd half [[ADDF_SUM_0]], [[ADDF_SUM_1]]
+; GFX9PLUS-NEXT:    ret half [[ADDF_SUM_2]]
+;
+main_body:
+  %tex = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
+  %tex_4_half = fptrunc <4 x float> %tex to <4 x half>
+  %tex_half_0 = extractelement <4 x half> %tex_4_half, i64 0
+  %tex_half_1 = extractelement <4 x half> %tex_4_half, i64 1
+  %tex_half_2 = extractelement <4 x half> %tex_4_half, i64 2
+  %tex_half_3 = extractelement <4 x half> %tex_4_half, i64 3
+  %addf_sum.0 = fadd half %tex_half_0, %tex_half_1
+  %addf_sum.1 = fadd half %tex_half_2, %tex_half_3
+  %addf_sum.2 = fadd half %addf_sum.0, %addf_sum.1
+  ret half %addf_sum.2
+}
+
+declare float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
+
+attributes #0 = { nounwind readonly willreturn}
+