Index: lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.cpp
+++ lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -571,6 +571,7 @@
   MachineFunction *MF = MBB.getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
   DebugLoc DL = MBB.findDebugLoc(MI);
 
   unsigned Size = FrameInfo->getObjectSize(FrameIndex);
@@ -587,8 +588,14 @@
     // We are only allowed to create one new instruction when spilling
     // registers, so we need to use pseudo instruction for spilling
     // SGPRs.
-    unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
-    BuildMI(MBB, MI, DL, get(Opcode))
+    const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(RC->getSize()));
+
+    // The SGPR spill/restore instructions only work on number sgprs, so
+    // we need to make srue we are using the correct register class.
+    if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+      MRI.constrainRegClass(SrcReg, RI.getRegClass(OpDesc.OpInfo[0].RegClass));
+
+    BuildMI(MBB, MI, DL, OpDesc)
       .addReg(SrcReg)            // src
       .addFrameIndex(FrameIndex) // frame_idx
       .addMemOperand(MMO);
@@ -661,6 +668,7 @@
   MachineFunction *MF = MBB.getParent();
   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
   DebugLoc DL = MBB.findDebugLoc(MI);
   unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
   unsigned Size = FrameInfo->getObjectSize(FrameIndex);
@@ -672,10 +680,16 @@
     PtrInfo, MachineMemOperand::MOLoad, Size, Align);
 
   if (RI.isSGPRClass(RC)) {
+    const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(RC->getSize()));
+
+    // The SGPR spill/restore instructions only work on number sgprs, so
+    // we need to make srue we are using the correct register class.
+    if(TargetRegisterInfo::isVirtualRegister(DestReg))
+      MRI.constrainRegClass(DestReg, RI.getRegClass(OpDesc.OpInfo[0].RegClass));
+
     // FIXME: Maybe this should not include a memoperand because it will be
     // lowered to non-memory instructions.
-    unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
-    BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+    BuildMI(MBB, MI, DL, OpDesc, DestReg)
       .addFrameIndex(FrameIndex) // frame_idx
       .addMemOperand(MMO);
 
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -2033,9 +2033,9 @@
   } // End UseNamedOperandTable = 1
 }
 
-// It's unclear whether you can use M0 as the output of v_readlane_b32
-// instructions, so use SGPR_32 register class for spills to prevent
-// this from happening.
+// You cannot use M0 as the output of v_readlane_b32 instructions or use it
+// in the sdata operand of SMEM instructions,  so use  SGPR_32 register class
+// for SI_SPILL_32_* instructions to prevent this from happening.
 defm SI_SPILL_S32  : SI_SPILL_SGPR <SGPR_32>;
 defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
 defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
Index: test/CodeGen/AMDGPU/sgpr-spill-regclass.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/sgpr-spill-regclass.ll
@@ -0,0 +1,468 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
+
+@global = external unnamed_addr addrspace(2) constant [64 x <2 x double>], align 16
+
+; CHECK: {{^}}hoge:
+; CHECK: s_endpgm
+; Function Attrs: nounwind uwtable
+define spir_kernel void @hoge(float addrspace(1)* nocapture readonly %arg, float addrspace(1)* %arg1, float %arg2) #0 align 2 {
+bb:
+  %tmp = tail call i32 @llvm.r600.read.tidig.z() #2
+  %tmp3 = zext i32 %tmp to i64
+  %tmp4 = add nuw nsw i64 0, %tmp3
+  %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp4
+  %tmp6 = load float, float addrspace(1)* %tmp5, align 4
+  %tmp7 = fmul float %tmp6, %arg2
+  %tmp8 = fcmp olt float undef, 6.250000e-02
+  %tmp9 = tail call float @llvm.fma.f32(float undef, float undef, float undef) #2
+  %tmp10 = fadd float undef, %tmp9
+  %tmp11 = fadd float undef, undef
+  %tmp12 = select i1 %tmp8, float %tmp10, float %tmp11
+  %tmp13 = icmp ugt i32 undef, 2139095039
+  %tmp14 = select i1 %tmp13, float %tmp7, float %tmp12
+  %tmp15 = icmp eq i32 undef, 0
+  %tmp16 = icmp sgt i32 undef, -2047
+  %tmp17 = select i1 %tmp16, i32 undef, i32 -2047
+  %tmp18 = icmp sgt i32 %tmp17, 0
+  %tmp19 = select i1 %tmp18, double undef, double undef
+  %tmp20 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp4
+  %tmp21 = fpext float %tmp14 to double
+  %tmp22 = select i1 undef, double 0x7FF8000000000000, double %tmp21
+  %tmp23 = select i1 %tmp15, double 0xFFF0000000000000, double %tmp22
+  %tmp24 = fptrunc double undef to float
+  %tmp25 = fptosi double undef to i32
+  %tmp26 = tail call double @llvm.fma.f64(double undef, double undef, double 0x3FA5555555555555) #2
+  %tmp27 = tail call double @llvm.fma.f64(double undef, double %tmp26, double 0x3FC5555555555555) #2
+  %tmp28 = select i1 undef, float 0.000000e+00, float undef
+  %tmp29 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float %tmp28, float addrspace(1)* %tmp29, align 4
+  %tmp30 = fmul double undef, 1.200000e+17
+  %tmp31 = fptrunc double %tmp30 to float
+  store float %tmp31, float addrspace(1)* undef, align 4
+  %tmp32 = fcmp olt double undef, 0xC0874910D52D3051
+  %tmp33 = select i1 %tmp32, float 0.000000e+00, float undef
+  store float %tmp33, float addrspace(1)* undef, align 4
+  %tmp34 = load <2 x double>, <2 x double> addrspace(2)* undef, align 16
+  %tmp35 = extractelement <2 x double> %tmp34, i64 0
+  %tmp36 = fadd double %tmp35, undef
+  %tmp37 = fmul double undef, %tmp36
+  %tmp38 = fmul double undef, %tmp37
+  %tmp39 = select i1 undef, double %tmp38, double undef
+  %tmp40 = tail call i1 @llvm.amdgcn.class.f64(double undef, i32 3) #2
+  %tmp41 = select i1 %tmp40, double undef, double %tmp39
+  %tmp42 = fcmp ogt double undef, 0x40862E42FEFA39EF
+  %tmp43 = fptrunc double %tmp41 to float
+  %tmp44 = select i1 %tmp42, float 0x7FF0000000000000, float %tmp43
+  %tmp45 = select i1 undef, float 0.000000e+00, float %tmp44
+  store float %tmp45, float addrspace(1)* undef, align 4
+  %tmp46 = fptosi double undef to i32
+  %tmp47 = ashr i32 %tmp46, 6
+  %tmp48 = icmp eq i32 %tmp47, -1022
+  %tmp49 = and i1 %tmp48, undef
+  %tmp50 = icmp slt i32 undef, -1022
+  %tmp51 = or i1 %tmp50, undef
+  %tmp52 = fptosi double undef to i32
+  %tmp53 = ashr i32 %tmp52, 6
+  %tmp54 = fadd double undef, undef
+  %tmp55 = icmp slt i32 %tmp53, -1022
+  %tmp56 = or i1 %tmp55, false
+  %tmp57 = fmul double undef, undef
+  %tmp58 = bitcast double %tmp54 to i64
+  %tmp59 = and i64 %tmp58, -9223372036854775808
+  %tmp60 = or i64 %tmp59, 9218868437227405312
+  %tmp61 = bitcast i64 %tmp60 to double
+  %tmp62 = select i1 undef, double %tmp61, double undef
+  %tmp63 = icmp slt i32 undef, -53
+  %tmp64 = select i1 %tmp63, double undef, double %tmp62
+  %tmp65 = select i1 undef, double %tmp54, double %tmp64
+  %tmp66 = select i1 %tmp56, double %tmp57, double %tmp65
+  %tmp67 = select i1 undef, double undef, double %tmp66
+  %tmp68 = fptrunc double %tmp67 to float
+  %tmp69 = select i1 undef, float 0x7FF0000000000000, float %tmp68
+  %tmp70 = select i1 undef, float 0.000000e+00, float %tmp69
+  store float %tmp70, float addrspace(1)* undef, align 4
+  store float 0x42BE036940000000, float addrspace(1)* undef, align 4
+  %tmp71 = fptosi double undef to i32
+  %tmp72 = ashr i32 %tmp71, 8
+  %tmp73 = add nsw i32 %tmp72, 1023
+  %tmp74 = zext i32 %tmp73 to i64
+  %tmp75 = shl i64 %tmp74, 52
+  %tmp76 = bitcast i64 %tmp75 to double
+  %tmp77 = fmul double %tmp76, undef
+  store float 0x42D0B07140000000, float addrspace(1)* undef, align 4
+  %tmp78 = fcmp oeq double undef, 0.000000e+00
+  %tmp79 = or i1 undef, %tmp78
+  %tmp80 = tail call i1 @llvm.amdgcn.class.f64(double undef, i32 516) #2
+  store float 0x42BD1A94A0000000, float addrspace(1)* undef, align 4
+  %tmp81 = fmul double undef, 0x4072DEE148BA83F5
+  %tmp82 = fsub double 0x403E56CD60708320, %tmp81
+  %tmp83 = fcmp ogt double %tmp82, 0x40862E42FEFA39EF
+  %tmp84 = select i1 %tmp83, float 0x7FF0000000000000, float undef
+  %tmp85 = select i1 undef, float 0.000000e+00, float %tmp84
+  store float %tmp85, float addrspace(1)* undef, align 4
+  store float 0x42BB48EB60000000, float addrspace(1)* undef, align 4
+  store float 0x42AB48EB60000000, float addrspace(1)* undef, align 4
+  store float 0x42CFD512A0000000, float addrspace(1)* undef, align 4
+  %tmp86 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float 0x42A5D3EF80000000, float addrspace(1)* %tmp86, align 4
+  %tmp87 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float 0x42A05EF3A0000000, float addrspace(1)* %tmp87, align 4
+  %tmp88 = fmul double undef, 0x40571547652B82FE
+  %tmp89 = fptosi double %tmp88 to i32
+  %tmp90 = sitofp i32 %tmp89 to double
+  %tmp91 = ashr i32 %tmp89, 6
+  %tmp92 = tail call double @llvm.fma.f64(double 0xBF862E42FEFA0000, double %tmp90, double undef) #2
+  %tmp93 = tail call double @llvm.fma.f64(double 0xBD1CF79ABC9E3B39, double %tmp90, double %tmp92) #2
+  %tmp94 = tail call double @llvm.fma.f64(double %tmp93, double 0x3F56C16C16C16C17, double 0x3F81111111111111) #2
+  %tmp95 = icmp eq i32 %tmp91, -1022
+  %tmp96 = fcmp olt double undef, 1.000000e+00
+  %tmp97 = and i1 %tmp95, %tmp96
+  %tmp98 = icmp sgt i32 undef, -2047
+  %tmp99 = select i1 %tmp98, i32 undef, i32 -2047
+  %tmp100 = icmp slt i32 %tmp99, 2047
+  %tmp101 = select i1 %tmp100, i32 %tmp99, i32 2047
+  %tmp102 = select i1 undef, float 0.000000e+00, float undef
+  %tmp103 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float %tmp102, float addrspace(1)* %tmp103, align 4
+  %tmp104 = icmp eq i32 undef, -1022
+  %tmp105 = and i1 %tmp104, undef
+  %tmp106 = select i1 undef, float 0x7FF0000000000000, float undef
+  %tmp107 = select i1 undef, float 0.000000e+00, float %tmp106
+  %tmp108 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float %tmp107, float addrspace(1)* %tmp108, align 4
+  %tmp109 = fadd double 0.000000e+00, 0x4040172079F30B25
+  %tmp110 = tail call i1 @llvm.amdgcn.class.f64(double %tmp109, i32 3) #2
+  %tmp111 = select i1 %tmp110, double %tmp109, double undef
+  %tmp112 = fptrunc double %tmp111 to float
+  %tmp113 = select i1 false, float 0x7FF0000000000000, float %tmp112
+  %tmp114 = select i1 false, float 0.000000e+00, float %tmp113
+  store float %tmp114, float addrspace(1)* undef, align 4
+  %tmp115 = fsub double 0x403F0F3C020ECDF9, undef
+  %tmp116 = fmul double %tmp115, 0x40571547652B82FE
+  %tmp117 = fptosi double %tmp116 to i32
+  %tmp118 = ashr i32 %tmp117, 6
+  %tmp119 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 undef
+  %tmp120 = load <2 x double>, <2 x double> addrspace(2)* %tmp119, align 16
+  %tmp121 = extractelement <2 x double> %tmp120, i64 0
+  %tmp122 = fadd double %tmp121, undef
+  %tmp123 = icmp slt i32 %tmp118, -1022
+  %tmp124 = icmp eq i32 %tmp118, -1022
+  %tmp125 = fcmp olt double %tmp122, 1.000000e+00
+  %tmp126 = and i1 %tmp124, %tmp125
+  %tmp127 = or i1 %tmp123, %tmp126
+  %tmp128 = select i1 undef, double %tmp122, double undef
+  %tmp129 = select i1 %tmp127, double undef, double %tmp128
+  %tmp130 = tail call i1 @llvm.amdgcn.class.f64(double %tmp115, i32 3) #2
+  %tmp131 = select i1 %tmp130, double %tmp115, double %tmp129
+  %tmp132 = fcmp ogt double %tmp115, 0x40862E42FEFA39EF
+  %tmp133 = fptrunc double %tmp131 to float
+  %tmp134 = select i1 %tmp132, float 0x7FF0000000000000, float %tmp133
+  %tmp135 = select i1 undef, float 0.000000e+00, float %tmp134
+  %tmp136 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float %tmp135, float addrspace(1)* %tmp136, align 4
+  %tmp137 = fmul double undef, 0x40B192C1CB6848BF
+  %tmp138 = fsub double 0x40384E8972DAE8EF, %tmp137
+  %tmp139 = fcmp ogt double %tmp138, 0x40862E42FEFA39EF
+  %tmp140 = select i1 %tmp139, float 0x7FF0000000000000, float undef
+  %tmp141 = select i1 undef, float 0.000000e+00, float %tmp140
+  store float %tmp141, float addrspace(1)* undef, align 4
+  store float 0x426D1A94A0000000, float addrspace(1)* undef, align 4
+  %tmp142 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  %tmp143 = fmul double undef, 0x40571547652B82FE
+  %tmp144 = fptosi double %tmp143 to i32
+  %tmp145 = ashr i32 %tmp144, 6
+  %tmp146 = icmp slt i32 %tmp145, -1022
+  %tmp147 = or i1 %tmp146, false
+  %tmp148 = select i1 %tmp147, double 0.000000e+00, double undef
+  %tmp149 = select i1 undef, double undef, double %tmp148
+  %tmp150 = fptrunc double %tmp149 to float
+  %tmp151 = select i1 undef, float 0x7FF0000000000000, float %tmp150
+  %tmp152 = select i1 undef, float 0.000000e+00, float %tmp151
+  store float %tmp152, float addrspace(1)* undef, align 4
+  store float 0x429ED99D80000000, float addrspace(1)* undef, align 4
+  store float 0x42B05EF3A0000000, float addrspace(1)* undef, align 4
+  %tmp153 = fptrunc double undef to float
+  store float %tmp153, float addrspace(1)* undef, align 4
+  %tmp154 = fmul double %tmp23, 9.700000e-01
+  %tmp155 = fsub double 0x4042CBE022EAE693, %tmp154
+  %tmp156 = fmul double undef, 0x40737FE8CAC4B4D0
+  %tmp157 = fsub double %tmp155, %tmp156
+  %tmp158 = fmul double %tmp157, 0x40571547652B82FE
+  %tmp159 = fptosi double %tmp158 to i32
+  %tmp160 = and i32 %tmp159, 63
+  %tmp161 = tail call double @llvm.fma.f64(double 0xBF862E42FEFA0000, double undef, double %tmp157) #2
+  %tmp162 = tail call double @llvm.fma.f64(double 0xBD1CF79ABC9E3B39, double undef, double %tmp161) #2
+  %tmp163 = tail call double @llvm.fma.f64(double %tmp162, double 0x3F56C16C16C16C17, double 0x3F81111111111111) #2
+  %tmp164 = tail call double @llvm.fma.f64(double %tmp162, double %tmp163, double 0x3FA5555555555555) #2
+  %tmp165 = tail call double @llvm.fma.f64(double %tmp162, double %tmp164, double 0x3FC5555555555555) #2
+  %tmp166 = tail call double @llvm.fma.f64(double %tmp162, double %tmp165, double 5.000000e-01) #2
+  %tmp167 = tail call double @llvm.fma.f64(double %tmp162, double %tmp166, double 1.000000e+00) #2
+  %tmp168 = fmul double %tmp162, %tmp167
+  %tmp169 = zext i32 %tmp160 to i64
+  %tmp170 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 %tmp169
+  %tmp171 = load <2 x double>, <2 x double> addrspace(2)* %tmp170, align 16
+  %tmp172 = extractelement <2 x double> %tmp171, i64 0
+  %tmp173 = tail call double @llvm.fma.f64(double undef, double %tmp168, double undef) #2
+  %tmp174 = fadd double %tmp172, %tmp173
+  %tmp175 = ashr i32 %tmp159, 8
+  %tmp176 = add nsw i32 %tmp175, 1023
+  %tmp177 = zext i32 %tmp176 to i64
+  %tmp178 = shl i64 %tmp177, 52
+  %tmp179 = bitcast i64 %tmp178 to double
+  %tmp180 = fmul double %tmp179, %tmp174
+  %tmp181 = fmul double undef, %tmp180
+  %tmp182 = select i1 undef, double %tmp181, double undef
+  %tmp183 = select i1 undef, double %tmp157, double %tmp182
+  %tmp184 = fptrunc double %tmp183 to float
+  %tmp185 = select i1 undef, float 0x7FF0000000000000, float %tmp184
+  %tmp186 = select i1 undef, float 0.000000e+00, float %tmp185
+  store float %tmp186, float addrspace(1)* undef, align 4
+  %tmp187 = fmul double %tmp23, 1.000000e-01
+  %tmp188 = fadd double %tmp187, 0x403D3D0B84988095
+  %tmp189 = fsub double %tmp188, undef
+  %tmp190 = tail call i1 @llvm.amdgcn.class.f64(double %tmp189, i32 3) #2
+  %tmp191 = select i1 %tmp190, double %tmp189, double undef
+  %tmp192 = fptrunc double %tmp191 to float
+  %tmp193 = select i1 undef, float 0x7FF0000000000000, float %tmp192
+  %tmp194 = select i1 undef, float 0.000000e+00, float %tmp193
+  store float %tmp194, float addrspace(1)* undef, align 4
+  store float 0x42AD1A94A0000000, float addrspace(1)* undef, align 4
+  store float 0x4292309CE0000000, float addrspace(1)* undef, align 4
+  %tmp195 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float 0x42D6BCC420000000, float addrspace(1)* %tmp195, align 4
+  store float 0x42C6BCC420000000, float addrspace(1)* undef, align 4
+  %tmp196 = fmul double %tmp23, 1.182000e+01
+  %tmp197 = fsub double 0x405FDB8F8E7DDCA5, %tmp196
+  %tmp198 = fmul double undef, 0x40D18EFB9DB22D0E
+  %tmp199 = fsub double %tmp197, %tmp198
+  %tmp200 = fptosi double undef to i32
+  %tmp201 = ashr i32 %tmp200, 6
+  %tmp202 = icmp slt i32 %tmp201, -1022
+  %tmp203 = icmp eq i32 %tmp201, -1022
+  %tmp204 = fcmp olt double undef, 1.000000e+00
+  %tmp205 = and i1 %tmp203, %tmp204
+  %tmp206 = or i1 %tmp202, %tmp205
+  %tmp207 = ashr i32 %tmp200, 8
+  %tmp208 = add nsw i32 %tmp207, 1023
+  %tmp209 = zext i32 %tmp208 to i64
+  %tmp210 = shl i64 %tmp209, 52
+  %tmp211 = bitcast i64 %tmp210 to double
+  %tmp212 = fmul double %tmp211, undef
+  %tmp213 = fmul double undef, %tmp212
+  %tmp214 = select i1 %tmp206, double %tmp213, double undef
+  %tmp215 = select i1 undef, double %tmp199, double %tmp214
+  %tmp216 = fptrunc double %tmp215 to float
+  %tmp217 = select i1 undef, float 0x7FF0000000000000, float %tmp216
+  %tmp218 = select i1 undef, float 0.000000e+00, float %tmp217
+  store float %tmp218, float addrspace(1)* undef, align 4
+  store float 0x42A2309CE0000000, float addrspace(1)* undef, align 4
+  %tmp219 = fmul double undef, 0x40853ABD712A0EC7
+  %tmp220 = fsub double 0x403C30CD9472E92C, %tmp219
+  %tmp221 = fmul double %tmp220, 0x40571547652B82FE
+  %tmp222 = fptosi double %tmp221 to i32
+  %tmp223 = ashr i32 %tmp222, 6
+  %tmp224 = load <2 x double>, <2 x double> addrspace(2)* undef, align 16
+  %tmp225 = extractelement <2 x double> %tmp224, i64 1
+  %tmp226 = tail call double @llvm.fma.f64(double undef, double undef, double %tmp225) #2
+  %tmp227 = fadd double undef, %tmp226
+  %tmp228 = icmp slt i32 %tmp223, -1022
+  %tmp229 = icmp eq i32 %tmp223, -1022
+  %tmp230 = fcmp olt double %tmp227, 1.000000e+00
+  %tmp231 = and i1 %tmp229, %tmp230
+  %tmp232 = or i1 %tmp228, %tmp231
+  %tmp233 = fmul double undef, %tmp227
+  %tmp234 = select i1 %tmp232, double 0.000000e+00, double undef
+  %tmp235 = tail call i1 @llvm.amdgcn.class.f64(double %tmp220, i32 3) #2
+  %tmp236 = select i1 %tmp235, double %tmp220, double %tmp234
+  %tmp237 = fcmp ogt double %tmp220, 0x40862E42FEFA39EF
+  %tmp238 = fcmp olt double %tmp220, 0xC0874910D52D3051
+  %tmp239 = fptrunc double %tmp236 to float
+  %tmp240 = select i1 %tmp237, float 0x7FF0000000000000, float %tmp239
+  %tmp241 = select i1 %tmp238, float 0.000000e+00, float %tmp240
+  store float %tmp241, float addrspace(1)* undef, align 4
+  %tmp242 = fptrunc double undef to float
+  %tmp243 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float %tmp242, float addrspace(1)* %tmp243, align 4
+  %tmp244 = select i1 undef, float 0.000000e+00, float undef
+  %tmp245 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float %tmp244, float addrspace(1)* %tmp245, align 4
+  %tmp246 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float 0x42CB48EB60000000, float addrspace(1)* %tmp246, align 4
+  store float 0x42C5D3EF80000000, float addrspace(1)* undef, align 4
+  %tmp247 = fmul double undef, 0x40571547652B82FE
+  %tmp248 = fptosi double %tmp247 to i32
+  %tmp249 = sitofp i32 %tmp248 to double
+  %tmp250 = and i32 %tmp248, 63
+  %tmp251 = ashr i32 %tmp248, 6
+  %tmp252 = tail call double @llvm.fma.f64(double 0xBF862E42FEFA0000, double %tmp249, double undef) #2
+  %tmp253 = tail call double @llvm.fma.f64(double 0xBD1CF79ABC9E3B39, double %tmp249, double %tmp252) #2
+  %tmp254 = tail call double @llvm.fma.f64(double %tmp253, double 0x3F56C16C16C16C17, double 0x3F81111111111111) #2
+  %tmp255 = tail call double @llvm.fma.f64(double %tmp253, double %tmp254, double 0x3FA5555555555555) #2
+  %tmp256 = tail call double @llvm.fma.f64(double %tmp253, double %tmp255, double 0x3FC5555555555555) #2
+  %tmp257 = tail call double @llvm.fma.f64(double %tmp253, double %tmp256, double 5.000000e-01) #2
+  %tmp258 = tail call double @llvm.fma.f64(double %tmp253, double %tmp257, double 1.000000e+00) #2
+  %tmp259 = fmul double %tmp253, %tmp258
+  %tmp260 = zext i32 %tmp250 to i64
+  %tmp261 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 %tmp260
+  %tmp262 = load <2 x double>, <2 x double> addrspace(2)* %tmp261, align 16
+  %tmp263 = extractelement <2 x double> %tmp262, i64 0
+  %tmp264 = tail call double @llvm.fma.f64(double undef, double %tmp259, double undef) #2
+  %tmp265 = fadd double %tmp263, %tmp264
+  %tmp266 = icmp slt i32 %tmp251, -1022
+  %tmp267 = icmp eq i32 %tmp251, -1022
+  %tmp268 = fcmp olt double %tmp265, 1.000000e+00
+  %tmp269 = and i1 %tmp267, %tmp268
+  %tmp270 = or i1 %tmp266, %tmp269
+  %tmp271 = select i1 %tmp270, double undef, double 0.000000e+00
+  %tmp272 = tail call i1 @llvm.amdgcn.class.f64(double undef, i32 3) #2
+  %tmp273 = select i1 %tmp272, double undef, double %tmp271
+  %tmp274 = fptrunc double %tmp273 to float
+  %tmp275 = select i1 undef, float 0x7FF0000000000000, float %tmp274
+  %tmp276 = select i1 undef, float 0.000000e+00, float %tmp275
+  %tmp277 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float %tmp276, float addrspace(1)* %tmp277, align 4
+  %tmp278 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float 0.000000e+00, float addrspace(1)* %tmp278, align 4
+  %tmp279 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float 0x4256D14160000000, float addrspace(1)* %tmp279, align 4
+  store float 0x42B6BCC420000000, float addrspace(1)* undef, align 4
+  %tmp280 = icmp slt i32 undef, -53
+  %tmp281 = select i1 %tmp280, double 0.000000e+00, double undef
+  %tmp282 = icmp eq i32 undef, 0
+  %tmp283 = tail call i1 @llvm.amdgcn.class.f64(double undef, i32 516) #2
+  %tmp284 = fcmp oeq double undef, 0.000000e+00
+  %tmp285 = or i1 %tmp282, %tmp284
+  %tmp286 = or i1 %tmp283, %tmp285
+  %tmp287 = select i1 %tmp286, double undef, double %tmp281
+  %tmp288 = select i1 undef, double 0.000000e+00, double %tmp287
+  %tmp289 = select i1 undef, double undef, double %tmp288
+  %tmp290 = fcmp olt double undef, 0xC0874910D52D3051
+  %tmp291 = fptrunc double %tmp289 to float
+  %tmp292 = select i1 undef, float 0x7FF0000000000000, float %tmp291
+  %tmp293 = select i1 %tmp290, float 0.000000e+00, float %tmp292
+  store float %tmp293, float addrspace(1)* undef, align 4
+  %tmp294 = fsub double 0x403C52FCB196E661, undef
+  %tmp295 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 undef
+  %tmp296 = load <2 x double>, <2 x double> addrspace(2)* %tmp295, align 16
+  %tmp297 = extractelement <2 x double> %tmp296, i64 1
+  %tmp298 = tail call double @llvm.fma.f64(double undef, double 0.000000e+00, double %tmp297) #2
+  %tmp299 = fadd double undef, %tmp298
+  %tmp300 = fmul double undef, %tmp299
+  %tmp301 = fmul double undef, %tmp300
+  %tmp302 = select i1 undef, double %tmp301, double undef
+  %tmp303 = tail call i1 @llvm.amdgcn.class.f64(double %tmp294, i32 3) #2
+  %tmp304 = select i1 %tmp303, double %tmp294, double %tmp302
+  %tmp305 = fcmp ogt double %tmp294, 0x40862E42FEFA39EF
+  %tmp306 = fcmp olt double %tmp294, 0xC0874910D52D3051
+  %tmp307 = fptrunc double %tmp304 to float
+  %tmp308 = select i1 %tmp305, float 0x7FF0000000000000, float %tmp307
+  %tmp309 = select i1 %tmp306, float 0.000000e+00, float %tmp308
+  store float %tmp309, float addrspace(1)* undef, align 4
+  store float 0x427D1A94A0000000, float addrspace(1)* undef, align 4
+  %tmp310 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float 0x42E6BCC420000000, float addrspace(1)* %tmp310, align 4
+  store float 0x42835AA2E0000000, float addrspace(1)* undef, align 4
+  store float 0x429802BAA0000000, float addrspace(1)* undef, align 4
+  %tmp311 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float 0x42CB48EB60000000, float addrspace(1)* %tmp311, align 4
+  %tmp312 = fadd double undef, 0x4039EA8D92245A52
+  %tmp313 = fmul double undef, 0x40A71DD3F91E646F
+  %tmp314 = fsub double %tmp312, %tmp313
+  %tmp315 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 undef
+  %tmp316 = load <2 x double>, <2 x double> addrspace(2)* %tmp315, align 16
+  %tmp317 = extractelement <2 x double> %tmp316, i64 1
+  %tmp318 = tail call double @llvm.fma.f64(double undef, double undef, double %tmp317) #2
+  %tmp319 = fadd double undef, %tmp318
+  %tmp320 = select i1 undef, double %tmp319, double undef
+  %tmp321 = select i1 undef, double undef, double %tmp320
+  %tmp322 = tail call i1 @llvm.amdgcn.class.f64(double %tmp314, i32 3) #2
+  %tmp323 = select i1 %tmp322, double %tmp314, double %tmp321
+  %tmp324 = fcmp ogt double %tmp314, 0x40862E42FEFA39EF
+  %tmp325 = fcmp olt double %tmp314, 0xC0874910D52D3051
+  %tmp326 = fptrunc double %tmp323 to float
+  %tmp327 = select i1 %tmp324, float 0x7FF0000000000000, float %tmp326
+  %tmp328 = select i1 %tmp325, float 0.000000e+00, float %tmp327
+  %tmp329 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float %tmp328, float addrspace(1)* %tmp329, align 4
+  %tmp330 = fmul double %tmp23, 2.600000e+00
+  %tmp331 = fadd double %tmp330, 0x402256CB1CF45780
+  %tmp332 = fmul double undef, 0x40BB57BE6CF41F21
+  %tmp333 = fsub double %tmp331, %tmp332
+  %tmp334 = load <2 x double>, <2 x double> addrspace(2)* undef, align 16
+  %tmp335 = extractelement <2 x double> %tmp334, i64 0
+  %tmp336 = fadd double %tmp335, undef
+  %tmp337 = or i1 undef, undef
+  %tmp338 = fmul double undef, %tmp336
+  %tmp339 = fmul double undef, %tmp338
+  %tmp340 = select i1 %tmp337, double %tmp339, double undef
+  %tmp341 = select i1 undef, double %tmp333, double %tmp340
+  %tmp342 = fcmp ogt double %tmp333, 0x40862E42FEFA39EF
+  %tmp343 = fcmp olt double %tmp333, 0xC0874910D52D3051
+  %tmp344 = fptrunc double %tmp341 to float
+  %tmp345 = select i1 %tmp342, float 0x7FF0000000000000, float %tmp344
+  %tmp346 = select i1 %tmp343, float 0.000000e+00, float %tmp345
+  store float %tmp346, float addrspace(1)* undef, align 4
+  %tmp347 = tail call double @llvm.fma.f64(double undef, double undef, double 0x3FA5555555555555) #2
+  %tmp348 = tail call double @llvm.fma.f64(double undef, double %tmp347, double 0x3FC5555555555555) #2
+  %tmp349 = tail call double @llvm.fma.f64(double undef, double %tmp348, double 5.000000e-01) #2
+  %tmp350 = tail call double @llvm.fma.f64(double undef, double %tmp349, double 1.000000e+00) #2
+  %tmp351 = fmul double undef, %tmp350
+  %tmp352 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 undef
+  %tmp353 = tail call double @llvm.fma.f64(double undef, double %tmp351, double undef) #2
+  %tmp354 = fadd double 0.000000e+00, %tmp353
+  %tmp355 = icmp slt i32 undef, -1022
+  %tmp356 = icmp eq i32 undef, -1022
+  %tmp357 = fcmp olt double %tmp354, 1.000000e+00
+  %tmp358 = and i1 %tmp356, %tmp357
+  %tmp359 = or i1 %tmp355, %tmp358
+  %tmp360 = bitcast double %tmp354 to i64
+  %tmp361 = and i64 %tmp360, -9223372036854775808
+  %tmp362 = icmp sgt i32 undef, -2047
+  %tmp363 = select i1 %tmp362, i32 undef, i32 -2047
+  %tmp364 = icmp slt i32 %tmp363, 2047
+  %tmp365 = select i1 %tmp364, i32 %tmp363, i32 2047
+  %tmp366 = icmp sgt i32 %tmp363, 0
+  %tmp367 = zext i32 %tmp365 to i64
+  %tmp368 = shl i64 %tmp367, 52
+  %tmp369 = or i64 %tmp368, 0
+  %tmp370 = bitcast i64 %tmp369 to double
+  %tmp371 = select i1 %tmp366, double %tmp370, double undef
+  %tmp372 = icmp eq i32 %tmp365, 2047
+  %tmp373 = or i64 %tmp361, 9218868437227405312
+  %tmp374 = bitcast i64 %tmp373 to double
+  %tmp375 = select i1 %tmp372, double %tmp374, double %tmp371
+  %tmp376 = icmp slt i32 %tmp365, -53
+  %tmp377 = select i1 %tmp376, double undef, double %tmp375
+  %tmp378 = tail call i1 @llvm.amdgcn.class.f64(double %tmp354, i32 516) #2
+  %tmp379 = or i1 %tmp378, false
+  %tmp380 = select i1 %tmp379, double %tmp354, double %tmp377
+  %tmp381 = select i1 %tmp359, double 0.000000e+00, double %tmp380
+  %tmp382 = select i1 undef, double undef, double %tmp381
+  %tmp383 = fcmp ogt double undef, 0x40862E42FEFA39EF
+  %tmp384 = fptrunc double %tmp382 to float
+  %tmp385 = select i1 %tmp383, float 0x7FF0000000000000, float %tmp384
+  %tmp386 = select i1 undef, float 0.000000e+00, float %tmp385
+  store float %tmp386, float addrspace(1)* null, align 4
+  %tmp387 = add nuw nsw i64 0, %tmp4
+  %tmp388 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp387
+  store float 0x42D5D3EF80000000, float addrspace(1)* %tmp388, align 4
+  %tmp389 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef
+  store float 0x42B5D3EF80000000, float addrspace(1)* %tmp389, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.z() #1
+
+; Function Attrs: nounwind readnone
+declare double @llvm.fma.f64(double, double, double) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.fma.f32(float, float, float) #1
+
+; Function Attrs: nounwind readnone
+declare i1 @llvm.amdgcn.class.f64(double, i32) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }