Index: lib/Target/AMDGPU/SIInsertWaits.cpp
===================================================================
--- lib/Target/AMDGPU/SIInsertWaits.cpp
+++ lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -558,14 +558,6 @@
 
     // Wait for everything at the end of the MBB
     Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
-
-    // Functions returning something shouldn't contain S_ENDPGM, because other
-    // bytecode will be appended after it.
-    if (!ReturnsVoid) {
-      MachineBasicBlock::iterator I = MBB.getFirstTerminator();
-      if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
-        I->eraseFromParent();
-    }
   }
 
   return Changes;
Index: lib/Target/AMDGPU/SILowerControlFlow.cpp
===================================================================
--- lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -486,6 +486,7 @@
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
        BI != BE; ++BI) {
 
+    MachineBasicBlock *EmptyMBBAtEnd = NULL;
     MachineBasicBlock &MBB = *BI;
     MachineBasicBlock::iterator I, Next;
     for (I = MBB.begin(); I != MBB.end(); I = Next) {
@@ -562,6 +563,29 @@
         case AMDGPU::SI_INDIRECT_DST_V16:
           IndirectDst(MI);
           break;
+
+        case AMDGPU::S_ENDPGM: {
+          if (MF.getInfo<SIMachineFunctionInfo>()->returnsVoid())
+            break;
+
+          // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
+          // because external bytecode will be appended at the end.
+          if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
+            // S_ENDPGM is not the last instruction. Add an empty block at
+            // the end and jump there.
+            if (!EmptyMBBAtEnd) {
+              EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
+              MF.insert(MF.end(), EmptyMBBAtEnd);
+            }
+
+            MBB.addSuccessor(EmptyMBBAtEnd);
+            BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
+                    .addMBB(EmptyMBBAtEnd);
+          }
+
+          I->eraseFromParent();
+          break;
+        }
       }
     }
   }
Index: test/CodeGen/AMDGPU/ret_jump.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/ret_jump.ll
@@ -0,0 +1,383 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+target triple = "amdgcn--"
+
+; GCN-LABEL: {{^}}main:
+; GCN: ; BB#26:
+; GCN-NEXT: s_branch [[LASTBB:BB[0-9]*_[0-9]*]]
+; GCN-NEXT: BB0_
+; GCN: [[LASTBB]]
+; GCN-NEXT: .Lfunc_end0:
+define <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, i32 addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
+main_body:
+  %p22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
+  %p23 = load <16 x i8>, <16 x i8> addrspace(2)* %p22, align 16, !tbaa !0
+  %p24 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 0)
+  %p25 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 4)
+  %p26 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 8)
+  %p27 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 16)
+  %p28 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 20)
+  %p29 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 24)
+  %p30 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 32)
+  %p31 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 36)
+  %p32 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 40)
+  %p33 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 48)
+  %p34 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 64)
+  %p35 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 80)
+  %p36 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 84)
+  %p37 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 96)
+  %p38 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 112)
+  %p39 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 116)
+  %p40 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 128)
+  %p41 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 144)
+  %p42 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 160)
+  %p43 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 176)
+  %p44 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 192)
+  %p45 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 208)
+  %p46 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 224)
+  %p47 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 240)
+  %p48 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 256)
+  %p49 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 272)
+  %p50 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 288)
+  %p51 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 292)
+  %p52 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 296)
+  %p53 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 300)
+  %p54 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 304)
+  %p55 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 308)
+  %p56 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 312)
+  %p57 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 316)
+  %p58 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 320)
+  %p59 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 324)
+  %p60 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 328)
+  %p61 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 332)
+  %p62 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 336)
+  %p63 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 340)
+  %p64 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 344)
+  %p65 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 348)
+  %p66 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 352)
+  %p67 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 356)
+  %p68 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 360)
+  %p69 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 364)
+  %p70 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 368)
+  %p71 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 372)
+  %p72 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 376)
+  %p73 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 380)
+  %p74 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 384)
+  %p75 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 388)
+  %p76 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 392)
+  %p77 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 396)
+  %p78 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 400)
+  %p79 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 404)
+  %p80 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 408)
+  %p81 = call float @llvm.SI.load.const(<16 x i8> %p23, i32 412)
+  %p82 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
+  %p83 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
+  %p84 = fmul float %p24, %p82
+  %p85 = fmul float %p25, %p82
+  %p86 = fmul float %p26, %p82
+  %p87 = fmul float %p27, %p83
+  %p88 = fadd float %p87, %p84
+  %p89 = fmul float %p28, %p83
+  %p90 = fadd float %p89, %p85
+  %p91 = fmul float %p29, %p83
+  %p92 = fadd float %p91, %p86
+  %p93 = fadd float %p88, %p30
+  %p94 = fadd float %p90, %p31
+  %p95 = fadd float %p92, %p32
+  %p96 = fdiv float 1.000000e+00, %p95
+  %p97 = fmul float %p93, %p96
+  %p98 = fmul float %p94, %p96
+  %p99 = fsub float %p40, %p37
+  %p100 = fsub float %p98, %p36
+  %p101 = fsub float %p39, %p36
+  %p102 = fsub float %p97, %p35
+  %p103 = fsub float %p38, %p35
+  %p104 = fmul float %p102, %p103
+  %p105 = fmul float %p100, %p101
+  %p106 = fadd float %p105, %p104
+  %p107 = fmul float %p37, %p99
+  %p108 = fadd float %p107, %p106
+  %p109 = fsub float %p98, %p36
+  %p110 = fsub float %p98, %p36
+  %p111 = fsub float %p97, %p35
+  %p112 = fsub float %p97, %p35
+  %p113 = fmul float %p111, %p112
+  %p114 = fmul float %p109, %p110
+  %p115 = fadd float %p114, %p113
+  %p116 = fmul float %p37, %p37
+  %p117 = fsub float %p115, %p116
+  %p118 = call float @llvm.fabs.f32(float %p34)
+  %p119 = fcmp olt float %p118, 0x3EE4F8B580000000
+  %p120 = sext i1 %p119 to i32
+  %p121 = bitcast i32 %p120 to float
+  br i1 %p119, label %IF, label %ELSE
+
+IF:                                               ; preds = %main_body
+  %p122 = fcmp oeq float %p108, 0.000000e+00
+  br i1 %p122, label %ENDIF, label %ELSE38
+
+ELSE:                                             ; preds = %main_body
+  %p123 = fmul float %p34, %p117
+  %p124 = fmul float %p108, %p108
+  %p125 = fsub float %p124, %p123
+  %p126 = fcmp olt float %p125, 0.000000e+00
+  br i1 %p126, label %ENDIF, label %ELSE41
+
+ENDIF:                                            ; preds = %ELSE41, %ELSE, %ELSE38, %IF
+  %temp12.0 = phi float [ %p117, %IF ], [ %p117, %ELSE38 ], [ %p133, %ELSE41 ], [ %p125, %ELSE ]
+  %temp20.0 = phi float [ 0.000000e+00, %IF ], [ %p129, %ELSE38 ], [ %p121, %ELSE41 ], [ 0.000000e+00, %ELSE ]
+  %temp8.0 = phi float [ 0x36A0000000000000, %IF ], [ 0.000000e+00, %ELSE38 ], [ 0.000000e+00, %ELSE41 ], [ 0x36A0000000000000, %ELSE ]
+  %temp4.0 = phi float [ 0.000000e+00, %IF ], [ %p132, %ELSE38 ], [ %p136, %ELSE41 ], [ 0.000000e+00, %ELSE ]
+  %temp.0 = phi i32 [ 0, %IF ], [ -1, %ELSE38 ], [ -1, %ELSE41 ], [ 0, %ELSE ]
+  %p127 = icmp eq i32 %temp.0, 0
+  br i1 %p127, label %ENDIF42, label %IF43
+
+ELSE38:                                           ; preds = %IF
+  %p128 = sext i1 %p122 to i32
+  %p129 = bitcast i32 %p128 to float
+  %p130 = fmul float %p117, 5.000000e-01
+  %p131 = fdiv float 1.000000e+00, %p108
+  %p132 = fmul float %p130, %p131
+  br label %ENDIF
+
+ELSE41:                                           ; preds = %ELSE
+  %p133 = call float @llvm.sqrt.f32(float %p125)
+  %p134 = fadd float %p108, %p133
+  %p135 = fdiv float 1.000000e+00, %p34
+  %p136 = fmul float %p134, %p135
+  br label %ENDIF
+
+IF43:                                             ; preds = %ENDIF
+  %p137 = bitcast float %p33 to i32
+  %p138 = icmp eq i32 %p137, 0
+  br i1 %p138, label %IF46, label %ELSE47
+
+ENDIF42:                                          ; preds = %ENDIF, %IF67, %ENDIF63, %ENDIF45
+  %temp20.3 = phi float [ %temp20.0, %ENDIF ], [ %temp20.4, %ENDIF45 ], [ %p175, %IF67 ], [ %temp4.6, %ENDIF63 ]
+  %temp8.3 = phi float [ %temp8.0, %ENDIF ], [ %.temp8.0, %ENDIF45 ], [ %.temp8.0, %IF67 ], [ %.temp8.0, %ENDIF63 ]
+  %p139 = bitcast float %temp8.3 to i32
+  %p140 = icmp eq i32 %p139, 1
+  br i1 %p140, label %ENDIF69, label %ELSE71
+
+IF46:                                             ; preds = %IF43
+  %p141 = fcmp ole float %temp4.0, 0.000000e+00
+  %p142 = fcmp ogt float %temp4.0, 1.000000e+00
+  %p143 = or i1 %p141, %p142
+  br i1 %p143, label %IF49, label %ENDIF48
+
+ELSE47:                                           ; preds = %IF43
+  %p144 = fsub float %p40, %p37
+  %p145 = fmul float %temp4.0, %p144
+  %p146 = fsub float -0.000000e+00, %p37
+  %p147 = fcmp ugt float %p145, %p146
+  br i1 %p147, label %ENDIF54, label %IF55
+
+ENDIF45:                                          ; preds = %ENDIF54, %ENDIF48
+  %.sink = phi i1 [ %p155, %ENDIF48 ], [ %p162, %ENDIF54 ]
+  %temp20.4 = phi float [ %temp4.4.temp20.0, %ENDIF48 ], [ %temp4.5.temp20.0, %ENDIF54 ]
+  %temp4.3 = phi float [ %temp4.4, %ENDIF48 ], [ %temp4.5, %ENDIF54 ]
+  %.temp8.0 = select i1 %.sink, float 0x36A0000000000000, float %temp8.0
+  %p148 = icmp ne i32 %temp.0, 0
+  %not..sink = xor i1 %.sink, true
+  %p149 = and i1 %p148, %not..sink
+  br i1 %p149, label %IF61, label %ENDIF42
+
+IF49:                                             ; preds = %IF46
+  %p150 = fsub float %p108, %temp12.0
+  %p151 = fdiv float 1.000000e+00, %p34
+  %p152 = fmul float %p150, %p151
+  br label %ENDIF48
+
+ENDIF48:                                          ; preds = %IF46, %IF49
+  %temp4.4 = phi float [ %p152, %IF49 ], [ %temp4.0, %IF46 ]
+  %p153 = fcmp ole float %temp4.4, 0.000000e+00
+  %p154 = fcmp ogt float %temp4.4, 1.000000e+00
+  %p155 = or i1 %p153, %p154
+  %temp4.4.temp20.0 = select i1 %p155, float %temp4.4, float %temp20.0
+  br label %ENDIF45
+
+IF55:                                             ; preds = %ELSE47
+  %p156 = fsub float %p108, %temp12.0
+  %p157 = fdiv float 1.000000e+00, %p34
+  %p158 = fmul float %p156, %p157
+  br label %ENDIF54
+
+ENDIF54:                                          ; preds = %ELSE47, %IF55
+  %temp4.5 = phi float [ %p158, %IF55 ], [ %temp4.0, %ELSE47 ]
+  %p159 = fsub float %p40, %p37
+  %p160 = fmul float %temp4.5, %p159
+  %p161 = fsub float -0.000000e+00, %p37
+  %p162 = fcmp ole float %p160, %p161
+  %temp4.5.temp20.0 = select i1 %p162, float %temp4.5, float %temp20.0
+  br label %ENDIF45
+
+IF61:                                             ; preds = %ENDIF45
+  %p163 = bitcast float %p33 to i32
+  %p164 = icmp eq i32 %p163, 1
+  br i1 %p164, label %IF64, label %ENDIF63
+
+IF64:                                             ; preds = %IF61
+  %p165 = call float @llvm.floor.f32(float %temp4.3)
+  %p166 = fsub float %temp4.3, %p165
+  br label %ENDIF63
+
+ENDIF63:                                          ; preds = %IF61, %IF64
+  %temp4.6 = phi float [ %p166, %IF64 ], [ %temp4.3, %IF61 ]
+  %p167 = bitcast float %p33 to i32
+  %p168 = icmp eq i32 %p167, 3
+  br i1 %p168, label %IF67, label %ENDIF42
+
+IF67:                                             ; preds = %ENDIF63
+  %p169 = fmul float %temp4.6, 5.000000e-01
+  %p170 = fadd float %p169, 5.000000e-01
+  %p171 = call float @llvm.floor.f32(float %p170)
+  %p172 = fsub float %p170, %p171
+  %p173 = fmul float %p172, 2.000000e+00
+  %p174 = fadd float %p173, -1.000000e+00
+  %p175 = call float @llvm.fabs.f32(float %p174)
+  br label %ENDIF42
+
+ELSE71:                                           ; preds = %ENDIF42
+  %p176 = fcmp olt float %temp20.3, %p42
+  %p177 = bitcast float %p41 to i32
+  %p178 = icmp sgt i32 %p177, 0
+  %p179 = and i1 %p176, %p178
+  br i1 %p179, label %ENDIF72, label %ELSE74
+
+ENDIF69:                                          ; preds = %ENDIF42, %ENDIF96
+  %temp3.0 = phi float [ %p222, %ENDIF96 ], [ 0.000000e+00, %ENDIF42 ]
+  %temp2.0 = phi float [ %p235, %ENDIF96 ], [ 0.000000e+00, %ENDIF42 ]
+  %temp1.0 = phi float [ %p234, %ENDIF96 ], [ 0.000000e+00, %ENDIF42 ]
+  %temp.6 = phi float [ %p233, %ENDIF96 ], [ 0.000000e+00, %ENDIF42 ]
+  %p180 = bitcast float %4 to i32
+  %p181 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %p180, 8
+  %p182 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %p181, float %temp.6, 9
+  %p183 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %p182, float %temp1.0, 10
+  %p184 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %p183, float %temp2.0, 11
+  %p185 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %p184, float %temp3.0, 12
+  %p186 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %p185, float %20, 22
+  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %p186
+
+ELSE74:                                           ; preds = %ELSE71
+  %p187 = fcmp olt float %temp20.3, %p43
+  %p188 = bitcast float %p41 to i32
+  %p189 = icmp sgt i32 %p188, 1
+  %p190 = and i1 %p187, %p189
+  br i1 %p190, label %ENDIF72, label %ELSE77
+
+ENDIF72:                                          ; preds = %ELSE92, %ELSE89, %ELSE86, %ELSE83, %ELSE80, %ELSE77, %ELSE74, %ELSE71
+  %temp12.2 = phi float [ %p42, %ELSE71 ], [ %p43, %ELSE74 ], [ %p44, %ELSE77 ], [ %p45, %ELSE80 ], [ %p46, %ELSE83 ], [ %p47, %ELSE86 ], [ %p49, %ELSE92 ], [ %p48, %ELSE89 ]
+  %temp16.0 = phi float [ %p42, %ELSE71 ], [ %p42, %ELSE74 ], [ %p43, %ELSE77 ], [ %p44, %ELSE80 ], [ %p45, %ELSE83 ], [ %p46, %ELSE86 ], [ %., %ELSE92 ], [ %p47, %ELSE89 ]
+  %temp11.0 = phi float [ %p53, %ELSE71 ], [ %p57, %ELSE74 ], [ %p61, %ELSE77 ], [ %p65, %ELSE80 ], [ %p69, %ELSE83 ], [ %p73, %ELSE86 ], [ %p81, %ELSE92 ], [ %p77, %ELSE89 ]
+  %temp10.0 = phi float [ %p52, %ELSE71 ], [ %p56, %ELSE74 ], [ %p60, %ELSE77 ], [ %p64, %ELSE80 ], [ %p68, %ELSE83 ], [ %p72, %ELSE86 ], [ %p80, %ELSE92 ], [ %p76, %ELSE89 ]
+  %temp9.0 = phi float [ %p51, %ELSE71 ], [ %p55, %ELSE74 ], [ %p59, %ELSE77 ], [ %p63, %ELSE80 ], [ %p67, %ELSE83 ], [ %p71, %ELSE86 ], [ %p79, %ELSE92 ], [ %p75, %ELSE89 ]
+  %temp8.7 = phi float [ %p50, %ELSE71 ], [ %p54, %ELSE74 ], [ %p58, %ELSE77 ], [ %p62, %ELSE80 ], [ %p66, %ELSE83 ], [ %p70, %ELSE86 ], [ %p78, %ELSE92 ], [ %p74, %ELSE89 ]
+  %temp7.0 = phi float [ %p53, %ELSE71 ], [ %p53, %ELSE74 ], [ %p57, %ELSE77 ], [ %p61, %ELSE80 ], [ %p65, %ELSE83 ], [ %p69, %ELSE86 ], [ %.104, %ELSE92 ], [ %p73, %ELSE89 ]
+  %temp6.0 = phi float [ %p52, %ELSE71 ], [ %p52, %ELSE74 ], [ %p56, %ELSE77 ], [ %p60, %ELSE80 ], [ %p64, %ELSE83 ], [ %p68, %ELSE86 ], [ %.105, %ELSE92 ], [ %p72, %ELSE89 ]
+  %temp5.0 = phi float [ %p51, %ELSE71 ], [ %p51, %ELSE74 ], [ %p55, %ELSE77 ], [ %p59, %ELSE80 ], [ %p63, %ELSE83 ], [ %p67, %ELSE86 ], [ %.106, %ELSE92 ], [ %p71, %ELSE89 ]
+  %temp4.8 = phi float [ %p50, %ELSE71 ], [ %p50, %ELSE74 ], [ %p54, %ELSE77 ], [ %p58, %ELSE80 ], [ %p62, %ELSE83 ], [ %p66, %ELSE86 ], [ %.107, %ELSE92 ], [ %p70, %ELSE89 ]
+  %p191 = fsub float %temp12.2, %temp16.0
+  %p192 = fcmp ogt float %p191, 2.000000e+00
+  br i1 %p192, label %ENDIF96, label %ELSE98
+
+ELSE77:                                           ; preds = %ELSE74
+  %p193 = fcmp olt float %temp20.3, %p44
+  %p194 = bitcast float %p41 to i32
+  %p195 = icmp sgt i32 %p194, 2
+  %p196 = and i1 %p193, %p195
+  br i1 %p196, label %ENDIF72, label %ELSE80
+
+ELSE80:                                           ; preds = %ELSE77
+  %p197 = fcmp olt float %temp20.3, %p45
+  %p198 = bitcast float %p41 to i32
+  %p199 = icmp sgt i32 %p198, 3
+  %p200 = and i1 %p197, %p199
+  br i1 %p200, label %ENDIF72, label %ELSE83
+
+ELSE83:                                           ; preds = %ELSE80
+  %p201 = fcmp olt float %temp20.3, %p46
+  %p202 = bitcast float %p41 to i32
+  %p203 = icmp sgt i32 %p202, 4
+  %p204 = and i1 %p201, %p203
+  br i1 %p204, label %ENDIF72, label %ELSE86
+
+ELSE86:                                           ; preds = %ELSE83
+  %p205 = fcmp olt float %temp20.3, %p47
+  %p206 = bitcast float %p41 to i32
+  %p207 = icmp sgt i32 %p206, 5
+  %p208 = and i1 %p205, %p207
+  br i1 %p208, label %ENDIF72, label %ELSE89
+
+ELSE89:                                           ; preds = %ELSE86
+  %p209 = fcmp olt float %temp20.3, %p48
+  %p210 = bitcast float %p41 to i32
+  %p211 = icmp sgt i32 %p210, 6
+  %p212 = and i1 %p209, %p211
+  br i1 %p212, label %ENDIF72, label %ELSE92
+
+ELSE92:                                           ; preds = %ELSE89
+  %p213 = fcmp olt float %temp20.3, %p49
+  %p214 = bitcast float %p41 to i32
+  %p215 = icmp sgt i32 %p214, 7
+  %p216 = and i1 %p213, %p215
+  %. = select i1 %p216, float %p48, float %p49
+  %.104 = select i1 %p216, float %p77, float %p81
+  %.105 = select i1 %p216, float %p76, float %p80
+  %.106 = select i1 %p216, float %p75, float %p79
+  %.107 = select i1 %p216, float %p74, float %p78
+  br label %ENDIF72
+
+ELSE98:                                           ; preds = %ENDIF72
+  %p217 = fsub float %temp12.2, %temp16.0
+  %p218 = fcmp olt float %p217, 0x3EB0C6F7A0000000
+  br i1 %p218, label %ENDIF96, label %ELSE101
+
+ENDIF96:                                          ; preds = %ELSE101, %ELSE98, %ENDIF72
+  %temp24.0 = phi float [ 0.000000e+00, %ENDIF72 ], [ %p239, %ELSE101 ], [ 0.000000e+00, %ELSE98 ]
+  %p219 = fsub float 1.000000e+00, %temp24.0
+  %p220 = fmul float %temp24.0, %temp11.0
+  %p221 = fmul float %p219, %temp7.0
+  %p222 = fadd float %p221, %p220
+  %p223 = fsub float 1.000000e+00, %temp24.0
+  %p224 = fmul float %temp24.0, %temp8.7
+  %p225 = fmul float %temp24.0, %temp9.0
+  %p226 = fmul float %temp24.0, %temp10.0
+  %p227 = fmul float %p223, %temp4.8
+  %p228 = fadd float %p227, %p224
+  %p229 = fmul float %p223, %temp5.0
+  %p230 = fadd float %p229, %p225
+  %p231 = fmul float %p223, %temp6.0
+  %p232 = fadd float %p231, %p226
+  %p233 = fmul float %p228, %p222
+  %p234 = fmul float %p230, %p222
+  %p235 = fmul float %p232, %p222
+  br label %ENDIF69
+
+ELSE101:                                          ; preds = %ELSE98
+  %p236 = fsub float %temp20.3, %temp16.0
+  %p237 = fsub float %temp12.2, %temp16.0
+  %p238 = fdiv float 1.000000e+00, %p237
+  %p239 = fmul float %p236, %p238
+  br label %ENDIF96
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.fabs.f32(float) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.sqrt.f32(float) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.floor.f32(float) #1
+
+attributes #0 = { "InitialPSInputAddr"="36983" "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
+
+!0 = !{!"const", null, i32 1}