Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -571,6 +571,7 @@ MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned Size = FrameInfo->getObjectSize(FrameIndex); @@ -587,8 +588,14 @@ // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling // SGPRs. - unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize()); - BuildMI(MBB, MI, DL, get(Opcode)) + const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(RC->getSize())); + + // The SGPR spill/restore instructions only work on number sgprs, so + // we need to make srue we are using the correct register class. + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + MRI.constrainRegClass(SrcReg, RI.getRegClass(OpDesc.OpInfo[0].RegClass)); + + BuildMI(MBB, MI, DL, OpDesc) .addReg(SrcReg) // src .addFrameIndex(FrameIndex) // frame_idx .addMemOperand(MMO); @@ -661,6 +668,7 @@ MachineFunction *MF = MBB.getParent(); const SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); unsigned Size = FrameInfo->getObjectSize(FrameIndex); @@ -672,10 +680,16 @@ PtrInfo, MachineMemOperand::MOLoad, Size, Align); if (RI.isSGPRClass(RC)) { + const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(RC->getSize())); + + // The SGPR spill/restore instructions only work on number sgprs, so + // we need to make srue we are using the correct register class. + if(TargetRegisterInfo::isVirtualRegister(DestReg)) + MRI.constrainRegClass(DestReg, RI.getRegClass(OpDesc.OpInfo[0].RegClass)); + // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. - unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize()); - BuildMI(MBB, MI, DL, get(Opcode), DestReg) + BuildMI(MBB, MI, DL, OpDesc, DestReg) .addFrameIndex(FrameIndex) // frame_idx .addMemOperand(MMO); Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -2033,9 +2033,9 @@ } // End UseNamedOperandTable = 1 } -// It's unclear whether you can use M0 as the output of v_readlane_b32 -// instructions, so use SGPR_32 register class for spills to prevent -// this from happening. +// You cannot use M0 as the output of v_readlane_b32 instructions or use it +// in the sdata operand of SMEM instructions, so use SGPR_32 register class +// for SI_SPILL_32_* instructions to prevent this from happening. defm SI_SPILL_S32 : SI_SPILL_SGPR ; defm SI_SPILL_S64 : SI_SPILL_SGPR ; defm SI_SPILL_S128 : SI_SPILL_SGPR ; Index: test/CodeGen/AMDGPU/sgpr-spill-regclass.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/sgpr-spill-regclass.ll @@ -0,0 +1,468 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +@global = external unnamed_addr addrspace(2) constant [64 x <2 x double>], align 16 + +; CHECK: {{^}}hoge: +; CHECK: s_endpgm +; Function Attrs: nounwind uwtable +define spir_kernel void @hoge(float addrspace(1)* nocapture readonly %arg, float addrspace(1)* %arg1, float %arg2) #0 align 2 { +bb: + %tmp = tail call i32 @llvm.r600.read.tidig.z() #2 + %tmp3 = zext i32 %tmp to i64 + %tmp4 = add nuw nsw i64 0, %tmp3 + %tmp5 = getelementptr inbounds float, float addrspace(1)* %arg, i64 %tmp4 + %tmp6 = load float, float addrspace(1)* %tmp5, align 4 + %tmp7 = fmul float %tmp6, %arg2 + %tmp8 = fcmp olt float undef, 6.250000e-02 + %tmp9 = tail call float @llvm.fma.f32(float undef, float undef, float undef) #2 + %tmp10 = fadd float undef, %tmp9 + %tmp11 = fadd float undef, undef + %tmp12 = select i1 %tmp8, float %tmp10, float %tmp11 + %tmp13 = icmp ugt i32 undef, 2139095039 + %tmp14 = select i1 %tmp13, float %tmp7, float %tmp12 + %tmp15 = icmp eq i32 undef, 0 + %tmp16 = icmp sgt i32 undef, -2047 + %tmp17 = select i1 %tmp16, i32 undef, i32 -2047 + %tmp18 = icmp sgt i32 %tmp17, 0 + %tmp19 = select i1 %tmp18, double undef, double undef + %tmp20 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp4 + %tmp21 = fpext float %tmp14 to double + %tmp22 = select i1 undef, double 0x7FF8000000000000, double %tmp21 + %tmp23 = select i1 %tmp15, double 0xFFF0000000000000, double %tmp22 + %tmp24 = fptrunc double undef to float + %tmp25 = fptosi double undef to i32 + %tmp26 = tail call double @llvm.fma.f64(double undef, double undef, double 0x3FA5555555555555) #2 + %tmp27 = tail call double @llvm.fma.f64(double undef, double %tmp26, double 0x3FC5555555555555) #2 + %tmp28 = select i1 undef, float 0.000000e+00, float undef + %tmp29 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float %tmp28, float addrspace(1)* %tmp29, align 4 + %tmp30 = fmul double undef, 1.200000e+17 + %tmp31 = fptrunc double %tmp30 to float + store float %tmp31, float addrspace(1)* undef, align 4 + %tmp32 = fcmp olt double undef, 0xC0874910D52D3051 + %tmp33 = select i1 %tmp32, float 0.000000e+00, float undef + store float %tmp33, float addrspace(1)* undef, align 4 + %tmp34 = load <2 x double>, <2 x double> addrspace(2)* undef, align 16 + %tmp35 = extractelement <2 x double> %tmp34, i64 0 + %tmp36 = fadd double %tmp35, undef + %tmp37 = fmul double undef, %tmp36 + %tmp38 = fmul double undef, %tmp37 + %tmp39 = select i1 undef, double %tmp38, double undef + %tmp40 = tail call i1 @llvm.amdgcn.class.f64(double undef, i32 3) #2 + %tmp41 = select i1 %tmp40, double undef, double %tmp39 + %tmp42 = fcmp ogt double undef, 0x40862E42FEFA39EF + %tmp43 = fptrunc double %tmp41 to float + %tmp44 = select i1 %tmp42, float 0x7FF0000000000000, float %tmp43 + %tmp45 = select i1 undef, float 0.000000e+00, float %tmp44 + store float %tmp45, float addrspace(1)* undef, align 4 + %tmp46 = fptosi double undef to i32 + %tmp47 = ashr i32 %tmp46, 6 + %tmp48 = icmp eq i32 %tmp47, -1022 + %tmp49 = and i1 %tmp48, undef + %tmp50 = icmp slt i32 undef, -1022 + %tmp51 = or i1 %tmp50, undef + %tmp52 = fptosi double undef to i32 + %tmp53 = ashr i32 %tmp52, 6 + %tmp54 = fadd double undef, undef + %tmp55 = icmp slt i32 %tmp53, -1022 + %tmp56 = or i1 %tmp55, false + %tmp57 = fmul double undef, undef + %tmp58 = bitcast double %tmp54 to i64 + %tmp59 = and i64 %tmp58, -9223372036854775808 + %tmp60 = or i64 %tmp59, 9218868437227405312 + %tmp61 = bitcast i64 %tmp60 to double + %tmp62 = select i1 undef, double %tmp61, double undef + %tmp63 = icmp slt i32 undef, -53 + %tmp64 = select i1 %tmp63, double undef, double %tmp62 + %tmp65 = select i1 undef, double %tmp54, double %tmp64 + %tmp66 = select i1 %tmp56, double %tmp57, double %tmp65 + %tmp67 = select i1 undef, double undef, double %tmp66 + %tmp68 = fptrunc double %tmp67 to float + %tmp69 = select i1 undef, float 0x7FF0000000000000, float %tmp68 + %tmp70 = select i1 undef, float 0.000000e+00, float %tmp69 + store float %tmp70, float addrspace(1)* undef, align 4 + store float 0x42BE036940000000, float addrspace(1)* undef, align 4 + %tmp71 = fptosi double undef to i32 + %tmp72 = ashr i32 %tmp71, 8 + %tmp73 = add nsw i32 %tmp72, 1023 + %tmp74 = zext i32 %tmp73 to i64 + %tmp75 = shl i64 %tmp74, 52 + %tmp76 = bitcast i64 %tmp75 to double + %tmp77 = fmul double %tmp76, undef + store float 0x42D0B07140000000, float addrspace(1)* undef, align 4 + %tmp78 = fcmp oeq double undef, 0.000000e+00 + %tmp79 = or i1 undef, %tmp78 + %tmp80 = tail call i1 @llvm.amdgcn.class.f64(double undef, i32 516) #2 + store float 0x42BD1A94A0000000, float addrspace(1)* undef, align 4 + %tmp81 = fmul double undef, 0x4072DEE148BA83F5 + %tmp82 = fsub double 0x403E56CD60708320, %tmp81 + %tmp83 = fcmp ogt double %tmp82, 0x40862E42FEFA39EF + %tmp84 = select i1 %tmp83, float 0x7FF0000000000000, float undef + %tmp85 = select i1 undef, float 0.000000e+00, float %tmp84 + store float %tmp85, float addrspace(1)* undef, align 4 + store float 0x42BB48EB60000000, float addrspace(1)* undef, align 4 + store float 0x42AB48EB60000000, float addrspace(1)* undef, align 4 + store float 0x42CFD512A0000000, float addrspace(1)* undef, align 4 + %tmp86 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float 0x42A5D3EF80000000, float addrspace(1)* %tmp86, align 4 + %tmp87 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float 0x42A05EF3A0000000, float addrspace(1)* %tmp87, align 4 + %tmp88 = fmul double undef, 0x40571547652B82FE + %tmp89 = fptosi double %tmp88 to i32 + %tmp90 = sitofp i32 %tmp89 to double + %tmp91 = ashr i32 %tmp89, 6 + %tmp92 = tail call double @llvm.fma.f64(double 0xBF862E42FEFA0000, double %tmp90, double undef) #2 + %tmp93 = tail call double @llvm.fma.f64(double 0xBD1CF79ABC9E3B39, double %tmp90, double %tmp92) #2 + %tmp94 = tail call double @llvm.fma.f64(double %tmp93, double 0x3F56C16C16C16C17, double 0x3F81111111111111) #2 + %tmp95 = icmp eq i32 %tmp91, -1022 + %tmp96 = fcmp olt double undef, 1.000000e+00 + %tmp97 = and i1 %tmp95, %tmp96 + %tmp98 = icmp sgt i32 undef, -2047 + %tmp99 = select i1 %tmp98, i32 undef, i32 -2047 + %tmp100 = icmp slt i32 %tmp99, 2047 + %tmp101 = select i1 %tmp100, i32 %tmp99, i32 2047 + %tmp102 = select i1 undef, float 0.000000e+00, float undef + %tmp103 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float %tmp102, float addrspace(1)* %tmp103, align 4 + %tmp104 = icmp eq i32 undef, -1022 + %tmp105 = and i1 %tmp104, undef + %tmp106 = select i1 undef, float 0x7FF0000000000000, float undef + %tmp107 = select i1 undef, float 0.000000e+00, float %tmp106 + %tmp108 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float %tmp107, float addrspace(1)* %tmp108, align 4 + %tmp109 = fadd double 0.000000e+00, 0x4040172079F30B25 + %tmp110 = tail call i1 @llvm.amdgcn.class.f64(double %tmp109, i32 3) #2 + %tmp111 = select i1 %tmp110, double %tmp109, double undef + %tmp112 = fptrunc double %tmp111 to float + %tmp113 = select i1 false, float 0x7FF0000000000000, float %tmp112 + %tmp114 = select i1 false, float 0.000000e+00, float %tmp113 + store float %tmp114, float addrspace(1)* undef, align 4 + %tmp115 = fsub double 0x403F0F3C020ECDF9, undef + %tmp116 = fmul double %tmp115, 0x40571547652B82FE + %tmp117 = fptosi double %tmp116 to i32 + %tmp118 = ashr i32 %tmp117, 6 + %tmp119 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 undef + %tmp120 = load <2 x double>, <2 x double> addrspace(2)* %tmp119, align 16 + %tmp121 = extractelement <2 x double> %tmp120, i64 0 + %tmp122 = fadd double %tmp121, undef + %tmp123 = icmp slt i32 %tmp118, -1022 + %tmp124 = icmp eq i32 %tmp118, -1022 + %tmp125 = fcmp olt double %tmp122, 1.000000e+00 + %tmp126 = and i1 %tmp124, %tmp125 + %tmp127 = or i1 %tmp123, %tmp126 + %tmp128 = select i1 undef, double %tmp122, double undef + %tmp129 = select i1 %tmp127, double undef, double %tmp128 + %tmp130 = tail call i1 @llvm.amdgcn.class.f64(double %tmp115, i32 3) #2 + %tmp131 = select i1 %tmp130, double %tmp115, double %tmp129 + %tmp132 = fcmp ogt double %tmp115, 0x40862E42FEFA39EF + %tmp133 = fptrunc double %tmp131 to float + %tmp134 = select i1 %tmp132, float 0x7FF0000000000000, float %tmp133 + %tmp135 = select i1 undef, float 0.000000e+00, float %tmp134 + %tmp136 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float %tmp135, float addrspace(1)* %tmp136, align 4 + %tmp137 = fmul double undef, 0x40B192C1CB6848BF + %tmp138 = fsub double 0x40384E8972DAE8EF, %tmp137 + %tmp139 = fcmp ogt double %tmp138, 0x40862E42FEFA39EF + %tmp140 = select i1 %tmp139, float 0x7FF0000000000000, float undef + %tmp141 = select i1 undef, float 0.000000e+00, float %tmp140 + store float %tmp141, float addrspace(1)* undef, align 4 + store float 0x426D1A94A0000000, float addrspace(1)* undef, align 4 + %tmp142 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + %tmp143 = fmul double undef, 0x40571547652B82FE + %tmp144 = fptosi double %tmp143 to i32 + %tmp145 = ashr i32 %tmp144, 6 + %tmp146 = icmp slt i32 %tmp145, -1022 + %tmp147 = or i1 %tmp146, false + %tmp148 = select i1 %tmp147, double 0.000000e+00, double undef + %tmp149 = select i1 undef, double undef, double %tmp148 + %tmp150 = fptrunc double %tmp149 to float + %tmp151 = select i1 undef, float 0x7FF0000000000000, float %tmp150 + %tmp152 = select i1 undef, float 0.000000e+00, float %tmp151 + store float %tmp152, float addrspace(1)* undef, align 4 + store float 0x429ED99D80000000, float addrspace(1)* undef, align 4 + store float 0x42B05EF3A0000000, float addrspace(1)* undef, align 4 + %tmp153 = fptrunc double undef to float + store float %tmp153, float addrspace(1)* undef, align 4 + %tmp154 = fmul double %tmp23, 9.700000e-01 + %tmp155 = fsub double 0x4042CBE022EAE693, %tmp154 + %tmp156 = fmul double undef, 0x40737FE8CAC4B4D0 + %tmp157 = fsub double %tmp155, %tmp156 + %tmp158 = fmul double %tmp157, 0x40571547652B82FE + %tmp159 = fptosi double %tmp158 to i32 + %tmp160 = and i32 %tmp159, 63 + %tmp161 = tail call double @llvm.fma.f64(double 0xBF862E42FEFA0000, double undef, double %tmp157) #2 + %tmp162 = tail call double @llvm.fma.f64(double 0xBD1CF79ABC9E3B39, double undef, double %tmp161) #2 + %tmp163 = tail call double @llvm.fma.f64(double %tmp162, double 0x3F56C16C16C16C17, double 0x3F81111111111111) #2 + %tmp164 = tail call double @llvm.fma.f64(double %tmp162, double %tmp163, double 0x3FA5555555555555) #2 + %tmp165 = tail call double @llvm.fma.f64(double %tmp162, double %tmp164, double 0x3FC5555555555555) #2 + %tmp166 = tail call double @llvm.fma.f64(double %tmp162, double %tmp165, double 5.000000e-01) #2 + %tmp167 = tail call double @llvm.fma.f64(double %tmp162, double %tmp166, double 1.000000e+00) #2 + %tmp168 = fmul double %tmp162, %tmp167 + %tmp169 = zext i32 %tmp160 to i64 + %tmp170 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 %tmp169 + %tmp171 = load <2 x double>, <2 x double> addrspace(2)* %tmp170, align 16 + %tmp172 = extractelement <2 x double> %tmp171, i64 0 + %tmp173 = tail call double @llvm.fma.f64(double undef, double %tmp168, double undef) #2 + %tmp174 = fadd double %tmp172, %tmp173 + %tmp175 = ashr i32 %tmp159, 8 + %tmp176 = add nsw i32 %tmp175, 1023 + %tmp177 = zext i32 %tmp176 to i64 + %tmp178 = shl i64 %tmp177, 52 + %tmp179 = bitcast i64 %tmp178 to double + %tmp180 = fmul double %tmp179, %tmp174 + %tmp181 = fmul double undef, %tmp180 + %tmp182 = select i1 undef, double %tmp181, double undef + %tmp183 = select i1 undef, double %tmp157, double %tmp182 + %tmp184 = fptrunc double %tmp183 to float + %tmp185 = select i1 undef, float 0x7FF0000000000000, float %tmp184 + %tmp186 = select i1 undef, float 0.000000e+00, float %tmp185 + store float %tmp186, float addrspace(1)* undef, align 4 + %tmp187 = fmul double %tmp23, 1.000000e-01 + %tmp188 = fadd double %tmp187, 0x403D3D0B84988095 + %tmp189 = fsub double %tmp188, undef + %tmp190 = tail call i1 @llvm.amdgcn.class.f64(double %tmp189, i32 3) #2 + %tmp191 = select i1 %tmp190, double %tmp189, double undef + %tmp192 = fptrunc double %tmp191 to float + %tmp193 = select i1 undef, float 0x7FF0000000000000, float %tmp192 + %tmp194 = select i1 undef, float 0.000000e+00, float %tmp193 + store float %tmp194, float addrspace(1)* undef, align 4 + store float 0x42AD1A94A0000000, float addrspace(1)* undef, align 4 + store float 0x4292309CE0000000, float addrspace(1)* undef, align 4 + %tmp195 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float 0x42D6BCC420000000, float addrspace(1)* %tmp195, align 4 + store float 0x42C6BCC420000000, float addrspace(1)* undef, align 4 + %tmp196 = fmul double %tmp23, 1.182000e+01 + %tmp197 = fsub double 0x405FDB8F8E7DDCA5, %tmp196 + %tmp198 = fmul double undef, 0x40D18EFB9DB22D0E + %tmp199 = fsub double %tmp197, %tmp198 + %tmp200 = fptosi double undef to i32 + %tmp201 = ashr i32 %tmp200, 6 + %tmp202 = icmp slt i32 %tmp201, -1022 + %tmp203 = icmp eq i32 %tmp201, -1022 + %tmp204 = fcmp olt double undef, 1.000000e+00 + %tmp205 = and i1 %tmp203, %tmp204 + %tmp206 = or i1 %tmp202, %tmp205 + %tmp207 = ashr i32 %tmp200, 8 + %tmp208 = add nsw i32 %tmp207, 1023 + %tmp209 = zext i32 %tmp208 to i64 + %tmp210 = shl i64 %tmp209, 52 + %tmp211 = bitcast i64 %tmp210 to double + %tmp212 = fmul double %tmp211, undef + %tmp213 = fmul double undef, %tmp212 + %tmp214 = select i1 %tmp206, double %tmp213, double undef + %tmp215 = select i1 undef, double %tmp199, double %tmp214 + %tmp216 = fptrunc double %tmp215 to float + %tmp217 = select i1 undef, float 0x7FF0000000000000, float %tmp216 + %tmp218 = select i1 undef, float 0.000000e+00, float %tmp217 + store float %tmp218, float addrspace(1)* undef, align 4 + store float 0x42A2309CE0000000, float addrspace(1)* undef, align 4 + %tmp219 = fmul double undef, 0x40853ABD712A0EC7 + %tmp220 = fsub double 0x403C30CD9472E92C, %tmp219 + %tmp221 = fmul double %tmp220, 0x40571547652B82FE + %tmp222 = fptosi double %tmp221 to i32 + %tmp223 = ashr i32 %tmp222, 6 + %tmp224 = load <2 x double>, <2 x double> addrspace(2)* undef, align 16 + %tmp225 = extractelement <2 x double> %tmp224, i64 1 + %tmp226 = tail call double @llvm.fma.f64(double undef, double undef, double %tmp225) #2 + %tmp227 = fadd double undef, %tmp226 + %tmp228 = icmp slt i32 %tmp223, -1022 + %tmp229 = icmp eq i32 %tmp223, -1022 + %tmp230 = fcmp olt double %tmp227, 1.000000e+00 + %tmp231 = and i1 %tmp229, %tmp230 + %tmp232 = or i1 %tmp228, %tmp231 + %tmp233 = fmul double undef, %tmp227 + %tmp234 = select i1 %tmp232, double 0.000000e+00, double undef + %tmp235 = tail call i1 @llvm.amdgcn.class.f64(double %tmp220, i32 3) #2 + %tmp236 = select i1 %tmp235, double %tmp220, double %tmp234 + %tmp237 = fcmp ogt double %tmp220, 0x40862E42FEFA39EF + %tmp238 = fcmp olt double %tmp220, 0xC0874910D52D3051 + %tmp239 = fptrunc double %tmp236 to float + %tmp240 = select i1 %tmp237, float 0x7FF0000000000000, float %tmp239 + %tmp241 = select i1 %tmp238, float 0.000000e+00, float %tmp240 + store float %tmp241, float addrspace(1)* undef, align 4 + %tmp242 = fptrunc double undef to float + %tmp243 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float %tmp242, float addrspace(1)* %tmp243, align 4 + %tmp244 = select i1 undef, float 0.000000e+00, float undef + %tmp245 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float %tmp244, float addrspace(1)* %tmp245, align 4 + %tmp246 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float 0x42CB48EB60000000, float addrspace(1)* %tmp246, align 4 + store float 0x42C5D3EF80000000, float addrspace(1)* undef, align 4 + %tmp247 = fmul double undef, 0x40571547652B82FE + %tmp248 = fptosi double %tmp247 to i32 + %tmp249 = sitofp i32 %tmp248 to double + %tmp250 = and i32 %tmp248, 63 + %tmp251 = ashr i32 %tmp248, 6 + %tmp252 = tail call double @llvm.fma.f64(double 0xBF862E42FEFA0000, double %tmp249, double undef) #2 + %tmp253 = tail call double @llvm.fma.f64(double 0xBD1CF79ABC9E3B39, double %tmp249, double %tmp252) #2 + %tmp254 = tail call double @llvm.fma.f64(double %tmp253, double 0x3F56C16C16C16C17, double 0x3F81111111111111) #2 + %tmp255 = tail call double @llvm.fma.f64(double %tmp253, double %tmp254, double 0x3FA5555555555555) #2 + %tmp256 = tail call double @llvm.fma.f64(double %tmp253, double %tmp255, double 0x3FC5555555555555) #2 + %tmp257 = tail call double @llvm.fma.f64(double %tmp253, double %tmp256, double 5.000000e-01) #2 + %tmp258 = tail call double @llvm.fma.f64(double %tmp253, double %tmp257, double 1.000000e+00) #2 + %tmp259 = fmul double %tmp253, %tmp258 + %tmp260 = zext i32 %tmp250 to i64 + %tmp261 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 %tmp260 + %tmp262 = load <2 x double>, <2 x double> addrspace(2)* %tmp261, align 16 + %tmp263 = extractelement <2 x double> %tmp262, i64 0 + %tmp264 = tail call double @llvm.fma.f64(double undef, double %tmp259, double undef) #2 + %tmp265 = fadd double %tmp263, %tmp264 + %tmp266 = icmp slt i32 %tmp251, -1022 + %tmp267 = icmp eq i32 %tmp251, -1022 + %tmp268 = fcmp olt double %tmp265, 1.000000e+00 + %tmp269 = and i1 %tmp267, %tmp268 + %tmp270 = or i1 %tmp266, %tmp269 + %tmp271 = select i1 %tmp270, double undef, double 0.000000e+00 + %tmp272 = tail call i1 @llvm.amdgcn.class.f64(double undef, i32 3) #2 + %tmp273 = select i1 %tmp272, double undef, double %tmp271 + %tmp274 = fptrunc double %tmp273 to float + %tmp275 = select i1 undef, float 0x7FF0000000000000, float %tmp274 + %tmp276 = select i1 undef, float 0.000000e+00, float %tmp275 + %tmp277 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float %tmp276, float addrspace(1)* %tmp277, align 4 + %tmp278 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float 0.000000e+00, float addrspace(1)* %tmp278, align 4 + %tmp279 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float 0x4256D14160000000, float addrspace(1)* %tmp279, align 4 + store float 0x42B6BCC420000000, float addrspace(1)* undef, align 4 + %tmp280 = icmp slt i32 undef, -53 + %tmp281 = select i1 %tmp280, double 0.000000e+00, double undef + %tmp282 = icmp eq i32 undef, 0 + %tmp283 = tail call i1 @llvm.amdgcn.class.f64(double undef, i32 516) #2 + %tmp284 = fcmp oeq double undef, 0.000000e+00 + %tmp285 = or i1 %tmp282, %tmp284 + %tmp286 = or i1 %tmp283, %tmp285 + %tmp287 = select i1 %tmp286, double undef, double %tmp281 + %tmp288 = select i1 undef, double 0.000000e+00, double %tmp287 + %tmp289 = select i1 undef, double undef, double %tmp288 + %tmp290 = fcmp olt double undef, 0xC0874910D52D3051 + %tmp291 = fptrunc double %tmp289 to float + %tmp292 = select i1 undef, float 0x7FF0000000000000, float %tmp291 + %tmp293 = select i1 %tmp290, float 0.000000e+00, float %tmp292 + store float %tmp293, float addrspace(1)* undef, align 4 + %tmp294 = fsub double 0x403C52FCB196E661, undef + %tmp295 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 undef + %tmp296 = load <2 x double>, <2 x double> addrspace(2)* %tmp295, align 16 + %tmp297 = extractelement <2 x double> %tmp296, i64 1 + %tmp298 = tail call double @llvm.fma.f64(double undef, double 0.000000e+00, double %tmp297) #2 + %tmp299 = fadd double undef, %tmp298 + %tmp300 = fmul double undef, %tmp299 + %tmp301 = fmul double undef, %tmp300 + %tmp302 = select i1 undef, double %tmp301, double undef + %tmp303 = tail call i1 @llvm.amdgcn.class.f64(double %tmp294, i32 3) #2 + %tmp304 = select i1 %tmp303, double %tmp294, double %tmp302 + %tmp305 = fcmp ogt double %tmp294, 0x40862E42FEFA39EF + %tmp306 = fcmp olt double %tmp294, 0xC0874910D52D3051 + %tmp307 = fptrunc double %tmp304 to float + %tmp308 = select i1 %tmp305, float 0x7FF0000000000000, float %tmp307 + %tmp309 = select i1 %tmp306, float 0.000000e+00, float %tmp308 + store float %tmp309, float addrspace(1)* undef, align 4 + store float 0x427D1A94A0000000, float addrspace(1)* undef, align 4 + %tmp310 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float 0x42E6BCC420000000, float addrspace(1)* %tmp310, align 4 + store float 0x42835AA2E0000000, float addrspace(1)* undef, align 4 + store float 0x429802BAA0000000, float addrspace(1)* undef, align 4 + %tmp311 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float 0x42CB48EB60000000, float addrspace(1)* %tmp311, align 4 + %tmp312 = fadd double undef, 0x4039EA8D92245A52 + %tmp313 = fmul double undef, 0x40A71DD3F91E646F + %tmp314 = fsub double %tmp312, %tmp313 + %tmp315 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 undef + %tmp316 = load <2 x double>, <2 x double> addrspace(2)* %tmp315, align 16 + %tmp317 = extractelement <2 x double> %tmp316, i64 1 + %tmp318 = tail call double @llvm.fma.f64(double undef, double undef, double %tmp317) #2 + %tmp319 = fadd double undef, %tmp318 + %tmp320 = select i1 undef, double %tmp319, double undef + %tmp321 = select i1 undef, double undef, double %tmp320 + %tmp322 = tail call i1 @llvm.amdgcn.class.f64(double %tmp314, i32 3) #2 + %tmp323 = select i1 %tmp322, double %tmp314, double %tmp321 + %tmp324 = fcmp ogt double %tmp314, 0x40862E42FEFA39EF + %tmp325 = fcmp olt double %tmp314, 0xC0874910D52D3051 + %tmp326 = fptrunc double %tmp323 to float + %tmp327 = select i1 %tmp324, float 0x7FF0000000000000, float %tmp326 + %tmp328 = select i1 %tmp325, float 0.000000e+00, float %tmp327 + %tmp329 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float %tmp328, float addrspace(1)* %tmp329, align 4 + %tmp330 = fmul double %tmp23, 2.600000e+00 + %tmp331 = fadd double %tmp330, 0x402256CB1CF45780 + %tmp332 = fmul double undef, 0x40BB57BE6CF41F21 + %tmp333 = fsub double %tmp331, %tmp332 + %tmp334 = load <2 x double>, <2 x double> addrspace(2)* undef, align 16 + %tmp335 = extractelement <2 x double> %tmp334, i64 0 + %tmp336 = fadd double %tmp335, undef + %tmp337 = or i1 undef, undef + %tmp338 = fmul double undef, %tmp336 + %tmp339 = fmul double undef, %tmp338 + %tmp340 = select i1 %tmp337, double %tmp339, double undef + %tmp341 = select i1 undef, double %tmp333, double %tmp340 + %tmp342 = fcmp ogt double %tmp333, 0x40862E42FEFA39EF + %tmp343 = fcmp olt double %tmp333, 0xC0874910D52D3051 + %tmp344 = fptrunc double %tmp341 to float + %tmp345 = select i1 %tmp342, float 0x7FF0000000000000, float %tmp344 + %tmp346 = select i1 %tmp343, float 0.000000e+00, float %tmp345 + store float %tmp346, float addrspace(1)* undef, align 4 + %tmp347 = tail call double @llvm.fma.f64(double undef, double undef, double 0x3FA5555555555555) #2 + %tmp348 = tail call double @llvm.fma.f64(double undef, double %tmp347, double 0x3FC5555555555555) #2 + %tmp349 = tail call double @llvm.fma.f64(double undef, double %tmp348, double 5.000000e-01) #2 + %tmp350 = tail call double @llvm.fma.f64(double undef, double %tmp349, double 1.000000e+00) #2 + %tmp351 = fmul double undef, %tmp350 + %tmp352 = getelementptr inbounds [64 x <2 x double>], [64 x <2 x double>] addrspace(2)* @global, i64 0, i64 undef + %tmp353 = tail call double @llvm.fma.f64(double undef, double %tmp351, double undef) #2 + %tmp354 = fadd double 0.000000e+00, %tmp353 + %tmp355 = icmp slt i32 undef, -1022 + %tmp356 = icmp eq i32 undef, -1022 + %tmp357 = fcmp olt double %tmp354, 1.000000e+00 + %tmp358 = and i1 %tmp356, %tmp357 + %tmp359 = or i1 %tmp355, %tmp358 + %tmp360 = bitcast double %tmp354 to i64 + %tmp361 = and i64 %tmp360, -9223372036854775808 + %tmp362 = icmp sgt i32 undef, -2047 + %tmp363 = select i1 %tmp362, i32 undef, i32 -2047 + %tmp364 = icmp slt i32 %tmp363, 2047 + %tmp365 = select i1 %tmp364, i32 %tmp363, i32 2047 + %tmp366 = icmp sgt i32 %tmp363, 0 + %tmp367 = zext i32 %tmp365 to i64 + %tmp368 = shl i64 %tmp367, 52 + %tmp369 = or i64 %tmp368, 0 + %tmp370 = bitcast i64 %tmp369 to double + %tmp371 = select i1 %tmp366, double %tmp370, double undef + %tmp372 = icmp eq i32 %tmp365, 2047 + %tmp373 = or i64 %tmp361, 9218868437227405312 + %tmp374 = bitcast i64 %tmp373 to double + %tmp375 = select i1 %tmp372, double %tmp374, double %tmp371 + %tmp376 = icmp slt i32 %tmp365, -53 + %tmp377 = select i1 %tmp376, double undef, double %tmp375 + %tmp378 = tail call i1 @llvm.amdgcn.class.f64(double %tmp354, i32 516) #2 + %tmp379 = or i1 %tmp378, false + %tmp380 = select i1 %tmp379, double %tmp354, double %tmp377 + %tmp381 = select i1 %tmp359, double 0.000000e+00, double %tmp380 + %tmp382 = select i1 undef, double undef, double %tmp381 + %tmp383 = fcmp ogt double undef, 0x40862E42FEFA39EF + %tmp384 = fptrunc double %tmp382 to float + %tmp385 = select i1 %tmp383, float 0x7FF0000000000000, float %tmp384 + %tmp386 = select i1 undef, float 0.000000e+00, float %tmp385 + store float %tmp386, float addrspace(1)* null, align 4 + %tmp387 = add nuw nsw i64 0, %tmp4 + %tmp388 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp387 + store float 0x42D5D3EF80000000, float addrspace(1)* %tmp388, align 4 + %tmp389 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 undef + store float 0x42B5D3EF80000000, float addrspace(1)* %tmp389, align 4 + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.r600.read.tidig.z() #1 + +; Function Attrs: nounwind readnone +declare double @llvm.fma.f64(double, double, double) #1 + +; Function Attrs: nounwind readnone +declare float @llvm.fma.f32(float, float, float) #1 + +; Function Attrs: nounwind readnone +declare i1 @llvm.amdgcn.class.f64(double, i32) #1 + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind }