Index: lib/Target/AMDGPU/AMDGPU.td
===================================================================
--- lib/Target/AMDGPU/AMDGPU.td
+++ lib/Target/AMDGPU/AMDGPU.td
@@ -1,188 +1,121 @@
-//===-- AMDGPU.td - AMDGPU Tablegen files ------------------*- tablegen -*-===//
+//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
 
 include "llvm/Target/Target.td"
 
-//===----------------------------------------------------------------------===//
-// Subtarget Features
-//===----------------------------------------------------------------------===//
-
-// Debugging Features
-
-def FeatureDumpCode : SubtargetFeature <"DumpCode",
-        "DumpCode",
-        "true",
-        "Dump MachineInstrs in the CodeEmitter">;
-
-def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
-        "DumpCode",
-        "true",
-        "Dump MachineInstrs in the CodeEmitter">;
-
-def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
-        "EnableIRStructurizer",
-        "false",
-        "Disable IR Structurizer">;
-
-def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
-        "EnablePromoteAlloca",
-        "true",
-        "Enable promote alloca pass">;
-
-// Target features
-
-def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
-        "EnableIfCvt",
-        "false",
-        "Disable the if conversion pass">;
+//===------------------------------------------------------------===//
+// Subtarget Features (device properties)
+//===------------------------------------------------------------===//
 
 def FeatureFP64 : SubtargetFeature<"fp64",
-        "FP64",
-        "true",
-        "Enable double precision operations">;
-
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
-        "FP64Denormals",
-        "true",
-        "Enable double precision denormal handling",
-        [FeatureFP64]>;
+  "FP64",
+  "true",
+  "Enable double precision operations"
+>;
 
 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
-        "FastFMAF32",
-        "true",
-        "Assuming f32 fma is at least as fast as mul + add",
-        []>;
+  "FastFMAF32",
+  "true",
+  "Assuming f32 fma is at least as fast as mul + add"
+>;
 
 def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
-        "HalfRate64Ops",
-        "true",
-        "Most fp64 instructions are half rate instead of quarter",
-        []>;
-
-// Some instructions do not support denormals despite this flag. Using
-// fp32 denormals also causes instructions to run at the double
-// precision rate for the device.
-def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
-        "FP32Denormals",
-        "true",
-        "Enable single precision denormal handling">;
+  "HalfRate64Ops",
+  "true",
+  "Most fp64 instructions are half rate instead of quarter"
+>;
 
 def Feature64BitPtr : SubtargetFeature<"64BitPtr",
-        "Is64bit",
-        "true",
-        "Specify if 64-bit addressing should be used">;
+  "Is64bit",
+  "true",
+  "Specify if 64-bit addressing should be used"
+>;
 
 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
-        "R600ALUInst",
-        "false",
-        "Older version of ALU instructions encoding">;
+  "R600ALUInst",
+  "false",
+  "Older version of ALU instructions encoding"
+>;
 
 def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
-        "HasVertexCache",
-        "true",
-        "Specify use of dedicated vertex cache">;
+  "HasVertexCache",
+  "true",
+  "Specify use of dedicated vertex cache"
+>;
 
 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
-        "CaymanISA",
-        "true",
-        "Use Cayman ISA">;
+  "CaymanISA",
+  "true",
+  "Use Cayman ISA"
+>;
 
 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
-        "CFALUBug",
-        "true",
-        "GPU has CF_ALU bug">;
-
-// XXX - This should probably be removed once enabled by default
-def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
-        "EnableLoadStoreOpt",
-        "true",
-        "Enable SI load/store optimizer pass">;
-
-// Performance debugging feature. Allow using DS instruction immediate
-// offsets even if the base pointer can't be proven to be base. On SI,
-// base pointer values that won't give the same result as a 16-bit add
-// are not safe to fold, but this will override the conservative test
-// for the base pointer.
-def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding",
-        "EnableUnsafeDSOffsetFolding",
-        "true",
-        "Force using DS instruction immediate offsets on SI">;
-
-def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
-        "FlatForGlobal",
-        "true",
-        "Force to generate flat instruction for global">;
+  "CFALUBug",
+  "true",
+  "GPU has CF_ALU bug"
+>;
 
 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
-        "FlatAddressSpace",
-        "true",
-        "Support flat address space">;
+  "FlatAddressSpace",
+  "true",
+  "Support flat address space"
+>;
 
 def FeatureXNACK : SubtargetFeature<"xnack",
-        "EnableXNACK",
-        "true",
-        "Enable XNACK support">;
-
-def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
-        "EnableVGPRSpilling",
-        "true",
-        "Enable spilling of VGPRs to scratch memory">;
+  "EnableXNACK",
+  "true",
+  "Enable XNACK support"
+>;
 
 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
-        "SGPRInitBug",
-        "true",
-        "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
-
-def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer",
-        "EnableHugeScratchBuffer",
-        "true",
-        "Enable scratch buffer sizes greater than 128 GB">;
-
-def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
-        "EnableSIScheduler",
-        "true",
-        "Enable SI Machine Scheduler">;
+  "SGPRInitBug",
+  "true",
+  "VI SGPR initilization bug requiring a fixed SGPR allocation size"
+>;
 
 class SubtargetFeatureFetchLimit <string Value> :
                           SubtargetFeature <"fetch"#Value,
-        "TexVTXClauseSize",
-        Value,
-        "Limit the maximum number of fetches in a clause to "#Value>;
+  "TexVTXClauseSize",
+  Value,
+  "Limit the maximum number of fetches in a clause to "#Value
+>;
 
 def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
 def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
 
 class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
-        "wavefrontsize"#Value,
-        "WavefrontSize",
-        !cast<string>(Value),
-        "The number of threads per wavefront">;
+  "wavefrontsize"#Value,
+  "WavefrontSize",
+  !cast<string>(Value),
+  "The number of threads per wavefront"
+>;
 
 def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
 
 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
-      "ldsbankcount"#Value,
-      "LDSBankCount",
-      !cast<string>(Value),
-      "The number of LDS banks per compute unit.">;
+  "ldsbankcount"#Value,
+  "LDSBankCount",
+  !cast<string>(Value),
+  "The number of LDS banks per compute unit."
+>;
 
 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
 
 class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping>
                                  : SubtargetFeature <
-      "isaver"#Major#"."#Minor#"."#Stepping,
-      "IsaVersion",
-      "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
-      "Instruction set version number"
+  "isaver"#Major#"."#Minor#"."#Stepping,
+  "IsaVersion",
+  "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
+  "Instruction set version number"
 >;
 
 def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>;
@@ -192,36 +125,135 @@
 def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3>;
 
 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
-        "localmemorysize"#Value,
-        "LocalMemorySize",
-        !cast<string>(Value),
-        "The size of local memory in bytes">;
+  "localmemorysize"#Value,
+  "LocalMemorySize",
+  !cast<string>(Value),
+  "The size of local memory in bytes"
+>;
 
 def FeatureGCN : SubtargetFeature<"gcn",
-        "IsGCN",
-        "true",
-        "GCN or newer GPU">;
+  "IsGCN",
+  "true",
+  "GCN or newer GPU"
+>;
 
 def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
-        "GCN1Encoding",
-        "true",
-        "Encoding format for SI and CI">;
+  "GCN1Encoding",
+  "true",
+  "Encoding format for SI and CI"
+>;
 
 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
-        "GCN3Encoding",
-        "true",
-        "Encoding format for VI">;
+  "GCN3Encoding",
+  "true",
+  "Encoding format for VI"
+>;
 
 def FeatureCIInsts : SubtargetFeature<"ci-insts",
-        "CIInsts",
-        "true",
-        "Additional intstructions for CI+">;
+  "CIInsts",
+  "true",
+  "Additional intstructions for CI+"
+>;
+
+//===------------------------------------------------------------===//
+// Subtarget Features (options and debugging)
+//===------------------------------------------------------------===//
+
+// Some instructions do not support denormals despite this flag. Using
+// fp32 denormals also causes instructions to run at the double
+// precision rate for the device.
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+  "FP32Denormals",
+  "true",
+  "Enable single precision denormal handling"
+>;
+
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+  "FP64Denormals",
+  "true",
+  "Enable double precision denormal handling",
+  [FeatureFP64]
+>;
+
+def FeatureEnableHugeScratchBuffer : SubtargetFeature<
+  "huge-scratch-buffer",
+  "EnableHugeScratchBuffer",
+  "true",
+  "Enable scratch buffer sizes greater than 128 GB"
+>;
+
+def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
+  "EnableVGPRSpilling",
+  "true",
+  "Enable spilling of VGPRs to scratch memory"
+>;
+
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+  "DumpCode",
+  "true",
+  "Dump MachineInstrs in the CodeEmitter"
+>;
+
+def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
+  "DumpCode",
+  "true",
+  "Dump MachineInstrs in the CodeEmitter"
+>;
+
+def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
+  "EnableIRStructurizer",
+  "false",
+  "Disable IR Structurizer"
+>;
+
+def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
+  "EnablePromoteAlloca",
+  "true",
+  "Enable promote alloca pass"
+>;
+
+// XXX - This should probably be removed once enabled by default
+def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
+  "EnableLoadStoreOpt",
+  "true",
+  "Enable SI load/store optimizer pass"
+>;
+
+// Performance debugging feature. Allow using DS instruction immediate
+// offsets even if the base pointer can't be proven to be base. On SI,
+// base pointer values that won't give the same result as a 16-bit add
+// are not safe to fold, but this will override the conservative test
+// for the base pointer.
+def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
+  "unsafe-ds-offset-folding",
+  "EnableUnsafeDSOffsetFolding",
+  "true",
+  "Force using DS instruction immediate offsets on SI"
+>;
+
+def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
+  "EnableIfCvt",
+  "false",
+  "Disable the if conversion pass"
+>;
+
+def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
+  "EnableSIScheduler",
+  "true",
+  "Enable SI Machine Scheduler"
+>;
+
+def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
+  "FlatForGlobal",
+  "true",
+  "Force to generate flat instruction for global"
+>;
 
 // Dummy feature used to disable assembler instructions.
 def FeatureDisable : SubtargetFeature<"",
-                                      "FeatureDisable","true",
-                                      "Dummy feature to disable assembler"
-                                      " instructions">;
+  "FeatureDisable","true",
+  "Dummy feature to disable assembler instructions"
+>;
 
 class SubtargetFeatureGeneration <string Value,
                                   list<SubtargetFeature> Implies> :
@@ -233,33 +265,39 @@
 def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
 
 def FeatureR600 : SubtargetFeatureGeneration<"R600",
-        [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
+  [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
+>;
 
 def FeatureR700 : SubtargetFeatureGeneration<"R700",
-        [FeatureFetchLimit16, FeatureLocalMemorySize0]>;
+  [FeatureFetchLimit16, FeatureLocalMemorySize0]
+>;
 
 def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
-        [FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
+  [FeatureFetchLimit16, FeatureLocalMemorySize32768]
+>;
 
 def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
-        [FeatureFetchLimit16, FeatureWavefrontSize64,
-         FeatureLocalMemorySize32768]
+  [FeatureFetchLimit16, FeatureWavefrontSize64,
+   FeatureLocalMemorySize32768]
 >;
 
 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
-        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
-         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
-         FeatureLDSBankCount32]>;
+  [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
+  FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
+  FeatureLDSBankCount32]
+>;
 
 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
-        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
-         FeatureGCN1Encoding, FeatureCIInsts]>;
+  [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
+  FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
+  FeatureGCN1Encoding, FeatureCIInsts]
+>;
 
 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
-        [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
-         FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
+  [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
+   FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+   FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]
+>;
 
 //===----------------------------------------------------------------------===//
 
@@ -289,6 +327,7 @@
 //===----------------------------------------------------------------------===//
 
 def TruePredicate : Predicate<"true">;
+
 def isSICI : Predicate<
   "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
   "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
@@ -298,6 +337,13 @@
   "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
   AssemblerPredicate<"FeatureGCN3Encoding">;
 
+def isCIVI : Predicate <
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
+  "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
+>, AssemblerPredicate<"FeatureCIInsts">;
+
+def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
+
 class PredicateControl {
   Predicate SubtargetPredicate;
   Predicate SIAssemblerPredicate = isSICI;
Index: lib/Target/AMDGPU/AMDGPUInstructions.td
===================================================================
--- lib/Target/AMDGPU/AMDGPUInstructions.td
+++ lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -587,13 +587,6 @@
   [{ (void)N; return TM.Options.NoNaNsFPMath; }]
 >;
 
-/*
-class UMUL24Pattern <Instruction UMUL24> : Pat <
-  (mul U24:$x, U24:$y),
-  (UMUL24 $x, $y)
->;
-*/
-
 class IMad24Pat<Instruction Inst> : Pat <
   (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
   (Inst $src0, $src1, $src2)
Index: lib/Target/AMDGPU/CIInstructions.td
===================================================================
--- lib/Target/AMDGPU/CIInstructions.td
+++ lib/Target/AMDGPU/CIInstructions.td
@@ -25,14 +25,6 @@
 // BUFFER_LOAD_DWORDX3
 // BUFFER_STORE_DWORDX3
 
-
-def isCIVI : Predicate <
-  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
-  "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
->, AssemblerPredicate<"FeatureCIInsts">;
-
-def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
-
 //===----------------------------------------------------------------------===//
 // VOP1 Instructions
 //===----------------------------------------------------------------------===//
@@ -262,7 +254,7 @@
   flat<0x60>, "flat_atomic_fmax_x2", VReg_64
 >;
 
-} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
+} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
 
 let Predicates = [isCI] in {
 
@@ -289,7 +281,7 @@
 
 let Predicates = [isCIVI] in {
 
-// Patterns for global loads with no offset
+// Patterns for global loads with no offset.
 class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
   (vt (node i64:$addr)),
   (inst $addr, 0, 0, 0)
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -101,7 +101,7 @@
   let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
     defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>;
     defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>;
-  } // let isRematerializeable = 1
+  } // End isRematerializeable = 1
 
   let Uses = [SCC] in {
     defm S_CMOV_B32 : SOP1_32 <sop1<0x05, 0x02>, "s_cmov_b32", []>;
@@ -1234,7 +1234,7 @@
   VOP_F64_I32, uint_to_fp
 >;
 
-} // let SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteQuarterRate32]
 
 defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32",
   VOP_F32_F32, AMDGPUfract
@@ -1270,7 +1270,7 @@
   VOP_F32_F32, AMDGPUrsq
 >;
 
-} //let SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteQuarterRate32]
 
 let SchedRW = [WriteDouble] in {
 
@@ -1281,7 +1281,7 @@
   VOP_F64_F64, AMDGPUrsq
 >;
 
-} // let SchedRW = [WriteDouble];
+} // End SchedRW = [WriteDouble];
 
 defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32",
   VOP_F32_F32, fsqrt
@@ -1710,7 +1710,7 @@
 defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32",
   VOP_I32_I32_I32_I32
 >;
-////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
+//def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
 defm V_DIV_FIXUP_F32 : VOP3Inst <
   vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
 >;
@@ -1740,13 +1740,13 @@
   VOP_F64_F64_F64, fmaxnum
 >;
 
-} // isCommutable = 1
+} // End isCommutable = 1
 
 defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
   VOP_F64_F64_I32, AMDGPUldexp
 >;
 
-} // let SchedRW = [WriteDoubleAdd]
+} // End let SchedRW = [WriteDoubleAdd]
 
 let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
 
@@ -1764,7 +1764,7 @@
   VOP_I32_I32_I32, mulhs
 >;
 
-} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
+} // End isCommutable = 1, SchedRW = [WriteQuarterRate32]
 
 let SchedRW = [WriteFloatFMA, WriteSALU] in {
 defm V_DIV_SCALE_F32 : VOP3bInst <vop3<0x16d, 0x1e0>, "v_div_scale_f32",
@@ -1777,7 +1777,7 @@
 defm V_DIV_SCALE_F64 : VOP3bInst <vop3<0x16e, 0x1e1>, "v_div_scale_f64",
   VOP3b_F64_I1_F64_F64_F64
 >;
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]
 
 let isCommutable = 1, Uses = [VCC, EXEC] in {
 
@@ -1814,7 +1814,7 @@
   vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop
 >;
 
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]
 
 // These instructions only exist on SI and CI
 let SubtargetPredicate = isSICI in {
@@ -1856,7 +1856,7 @@
 // 64-bit vector move instruction.  This is mainly used by the SIFoldOperands
 // pass to enable folding of inline immediates.
 def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
-} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
+} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0
 
 let hasSideEffects = 1, SALU = 1 in {
 def SGPR_USE : InstSI <(outs),(ins), "", []>;
@@ -1893,7 +1893,7 @@
   [(int_amdgcn_loop i64:$saved, bb:$target)]
 >;
 
-} // end isBranch = 1, isTerminator = 1
+} // End isBranch = 1, isTerminator = 1
 
 def SI_BREAK : InstSI <
   (outs SReg_64:$dst),
@@ -1934,7 +1934,7 @@
 >;
 } // End Uses = [EXEC], Defs = [EXEC,VCC]
 
-} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
+} // End mayLoad = 1, mayStore = 1, hasSideEffects = 1
 
 let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
 
@@ -1967,7 +1967,7 @@
 def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
 def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
 
-} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+} // End Uses = [EXEC], Defs = [EXEC,VCC,M0]
 
 multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
 
@@ -1975,8 +1975,7 @@
     def _SAVE : InstSI <
       (outs),
       (ins sgpr_class:$src, i32imm:$frame_idx),
-      "", []
-    > {
+      "", []> {
       let mayStore = 1;
       let mayLoad = 0;
     }
@@ -1984,8 +1983,7 @@
     def _RESTORE : InstSI <
       (outs sgpr_class:$dst),
       (ins i32imm:$frame_idx),
-      "", []
-    > {
+      "", []> {
       let mayStore = 0;
       let mayLoad = 1;
     }
@@ -2007,8 +2005,7 @@
       (outs),
       (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
            SReg_32:$scratch_offset),
-      "", []
-    > {
+      "", []> {
       let mayStore = 1;
       let mayLoad = 0;
     }
@@ -2016,8 +2013,7 @@
     def _RESTORE : InstSI <
       (outs vgpr_class:$dst),
       (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
-      "", []
-    > {
+      "", []> {
       let mayStore = 0;
       let mayLoad = 1;
     }
@@ -2043,9 +2039,9 @@
 
 } // End Defs = [SCC]
 
-} // end IsCodeGenOnly, isPseudo
+} // End isCodeGenOnly, isPseudo
 
-} // end SubtargetPredicate = isGCN
+} // End SubtargetPredicate = isGCN
 
 let Predicates = [isGCN] in {
 
@@ -2060,7 +2056,6 @@
   (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
 >;
 
-/* int_SI_export */
 def : Pat <
   (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
                  f32:$src0, f32:$src1, f32:$src2, f32:$src3),
@@ -2445,11 +2440,6 @@
 /********** Extraction, Insertion, Building and Casting  **********/
 /********** ============================================ **********/
 
-//def : Extract_Element<i64, v2i64, 0, sub0_sub1>;
-//def : Extract_Element<i64, v2i64, 1, sub2_sub3>;
-//def : Extract_Element<f64, v2f64, 0, sub0_sub1>;
-//def : Extract_Element<f64, v2f64, 1, sub2_sub3>;
-
 foreach Index = 0-2 in {
   def Extract_Element_v2i32_#Index : Extract_Element <
     i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -2514,46 +2504,45 @@
   >;
 }
 
-def : BitConvert <i32, f32, SReg_32>;
+// FIXME: Why do only some of these type combinations for SReg and
+// VReg?
+// 32-bit bitcast
 def : BitConvert <i32, f32, VGPR_32>;
-
-def : BitConvert <f32, i32, SReg_32>;
 def : BitConvert <f32, i32, VGPR_32>;
+def : BitConvert <i32, f32, SReg_32>;
+def : BitConvert <f32, i32, SReg_32>;
 
+// 64-bit bitcast
 def : BitConvert <i64, f64, VReg_64>;
-
 def : BitConvert <f64, i64, VReg_64>;
-
-def : BitConvert <v2f32, v2i32, VReg_64>;
 def : BitConvert <v2i32, v2f32, VReg_64>;
-def : BitConvert <v2i32, i64, VReg_64>;
+def : BitConvert <v2f32, v2i32, VReg_64>;
 def : BitConvert <i64, v2i32, VReg_64>;
-def : BitConvert <v2f32, i64, VReg_64>;
+def : BitConvert <v2i32, i64, VReg_64>;
 def : BitConvert <i64, v2f32, VReg_64>;
-def : BitConvert <v2f32, f64, VReg_64>;
-def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <v2f32, i64, VReg_64>;
 def : BitConvert <f64, v2f32, VReg_64>;
+def : BitConvert <v2f32, f64, VReg_64>;
 def : BitConvert <f64, v2i32, VReg_64>;
-def : BitConvert <v4f32, v4i32, VReg_128>;
+def : BitConvert <v2i32, f64, VReg_64>;
 def : BitConvert <v4i32, v4f32, VReg_128>;
+def : BitConvert <v4f32, v4i32, VReg_128>;
 
-
+// 128-bit bitcast
 def : BitConvert <v2i64, v4i32, SReg_128>;
 def : BitConvert <v4i32, v2i64, SReg_128>;
-
 def : BitConvert <v2f64, v4f32, VReg_128>;
 def : BitConvert <v2f64, v4i32, VReg_128>;
 def : BitConvert <v4f32, v2f64, VReg_128>;
 def : BitConvert <v4i32, v2f64, VReg_128>;
 
-
-
-
-def : BitConvert <v8f32, v8i32, SReg_256>;
+// 256-bit bitcast
 def : BitConvert <v8i32, v8f32, SReg_256>;
+def : BitConvert <v8f32, v8i32, SReg_256>;
 def : BitConvert <v8i32, v8f32, VReg_256>;
 def : BitConvert <v8f32, v8i32, VReg_256>;
 
+// 512-bit bitcast
 def : BitConvert <v16i32, v16f32, VReg_512>;
 def : BitConvert <v16f32, v16i32, VReg_512>;
 
@@ -2575,7 +2564,7 @@
 
 def : Pat <
   (fneg (fabs f32:$src)),
-  (S_OR_B32 $src, 0x80000000) /* Set sign bit */
+  (S_OR_B32 $src, 0x80000000) // Set sign bit
 >;
 
 // FIXME: Should use S_OR_B32
@@ -2665,7 +2654,6 @@
 /********** Intrinsic Patterns **********/
 /********** ================== **********/
 
-/* llvm.AMDGPU.pow */
 def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
 
 def : Pat <
@@ -2702,7 +2690,7 @@
 def : Ext32Pat <zext>;
 def : Ext32Pat <anyext>;
 
-// Offset in an 32Bit VGPR
+// Offset in an 32-bit VGPR
 def : Pat <
   (SIload_constant v4i32:$sbase, i32:$voff),
   (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
@@ -2934,22 +2922,6 @@
 def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
 def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
 
-/*
-class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
-  (st vt:$value, (MUBUFScratch v4i32:$srsrc, i64:$vaddr, u16imm:$offset)),
-  (Instr $value, $srsrc, $vaddr, $offset)
->;
-
-let Predicates = [isSICI] in {
-def : MUBUFStore_Pattern <BUFFER_STORE_BYTE_ADDR64, i32, truncstorei8_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_SHORT_ADDR64, i32, truncstorei16_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORD_ADDR64, i32, store_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2_ADDR64, v2i32, store_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4_ADDR64, v4i32, store_private>;
-} // End Predicates = [isSICI]
-
-*/
-
 //===----------------------------------------------------------------------===//
 // MTBUF Patterns
 //===----------------------------------------------------------------------===//