Index: llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
+++ llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
@@ -66,6 +66,22 @@
   let PrimaryKeyName = "getMIMGDimInfo";
 }
 
+class MIMGLZMapping<MIMGBaseOpcode l, MIMGBaseOpcode lz> {
+  MIMGBaseOpcode L = l;
+  MIMGBaseOpcode LZ = lz;
+}
+
+def MIMGLZMappingTable : GenericTable {
+  let FilterClass = "MIMGLZMapping";
+  let CppTypeName = "MIMGLZMappingInfo";
+  let Fields = ["L", "LZ"];
+  GenericEnum TypeOf_L = MIMGBaseOpcode;
+  GenericEnum TypeOf_LZ = MIMGBaseOpcode;
+
+  let PrimaryKey = ["L"];
+  let PrimaryKeyName = "getMIMGLZMappingInfo";
+}
+
 class mimg <bits<7> si, bits<7> vi = si> {
   field bits<7> SI = si;
   field bits<7> VI = vi;
@@ -547,3 +563,13 @@
                            AMDGPUImageDimAtomicIntrinsics) in {
   def : ImageDimIntrinsicInfo<intr>;
 }
+
+// L to LZ Optimization Mapping
+def : MIMGLZMapping<IMAGE_SAMPLE_L, IMAGE_SAMPLE_LZ>;
+def : MIMGLZMapping<IMAGE_SAMPLE_C_L, IMAGE_SAMPLE_C_LZ>;
+def : MIMGLZMapping<IMAGE_SAMPLE_L_O, IMAGE_SAMPLE_LZ_O>;
+def : MIMGLZMapping<IMAGE_SAMPLE_C_L_O, IMAGE_SAMPLE_C_LZ_O>;
+def : MIMGLZMapping<IMAGE_GATHER4_L, IMAGE_GATHER4_LZ>;
+def : MIMGLZMapping<IMAGE_GATHER4_C_L, IMAGE_GATHER4_C_LZ>;
+def : MIMGLZMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_LZ_O>;
+def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4555,6 +4555,9 @@
   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
       AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
+  const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
+      AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
+  unsigned IntrOpcode = Intr->BaseOpcode;
 
   SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
   bool IsD16 = false;
@@ -4640,6 +4643,18 @@
   SmallVector<SDValue, 4> VAddrs;
   for (unsigned i = 0; i < NumVAddrs; ++i)
     VAddrs.push_back(Op.getOperand(AddrIdx + i));
+
+  // Optimize _L to _LZ when _L is zero
+  if (LZMappingInfo) {
+    if (auto ConstantLod =
+         dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
+      if (ConstantLod->isZero() || ConstantLod->isNegative()) {
+        IntrOpcode = LZMappingInfo->LZ;  // set new opcode to _lz variant of _l
+        VAddrs.pop_back();               // remove 'lod'
+      }
+    }
+  }
+
   SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
 
   SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
@@ -4699,10 +4714,10 @@
   int Opcode = -1;
 
   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
-    Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8,
+    Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
                                    NumVDataDwords, NumVAddrDwords);
   if (Opcode == -1)
-    Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx6,
+    Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
                                    NumVDataDwords, NumVAddrDwords);
   assert(Opcode != -1);
 
Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,6 +42,7 @@
 #define GET_MIMGBaseOpcode_DECL
 #define GET_MIMGDim_DECL
 #define GET_MIMGEncoding_DECL
+#define GET_MIMGLZMapping_DECL
 #include "AMDGPUGenSearchableTables.inc"
 
 namespace IsaInfo {
@@ -211,6 +212,14 @@
 LLVM_READONLY
 const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
 
+struct MIMGLZMappingInfo {
+  MIMGBaseOpcode L;
+  MIMGBaseOpcode LZ;
+};
+
+LLVM_READONLY
+const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
+
 LLVM_READONLY
 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
                   unsigned VDataDwords, unsigned VAddrDwords);
Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -110,6 +110,7 @@
 #define GET_MIMGBaseOpcodesTable_IMPL
 #define GET_MIMGDimInfoTable_IMPL
 #define GET_MIMGInfoTable_IMPL
+#define GET_MIMGLZMappingTable_IMPL
 #include "AMDGPUGenSearchableTables.inc"
 
 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
@@ -0,0 +1,113 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+
+; GCN-LABEL: {{^}}sample_l_1d:
+; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_2d:
+; GCN: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_1d:
+; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_2d:
+; GCN: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_o_1d:
+; GCN: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_o_2d:
+; GCN: image_sample_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_o_1d:
+; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_o_2d:
+; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_l_2d:
+; GCN: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_l_2d:
+; GCN: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_l_o_2d:
+; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_l_o_2d:
+; GCN: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1