diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -985,7 +985,6 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() { addPass(createLICMPass()); addPass(createSeparateConstOffsetFromGEPPass()); - addPass(createSpeculativeExecutionPass()); // ReassociateGEPs exposes more opportunities for SLSR. See // the example in reassociate-geps-and-slsr.ll. addPass(createStraightLineStrengthReducePass()); diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -30,7 +30,6 @@ ; ISA-NEXT: .LBB0_1: ; %Flow1 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; ISA-NEXT: s_or_b64 exec, exec, s[6:7] -; ISA-NEXT: s_add_i32 s8, s8, 1 ; ISA-NEXT: s_mov_b64 s[6:7], 0 ; ISA-NEXT: .LBB0_2: ; %Flow ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 @@ -54,6 +53,7 @@ ; ISA-NEXT: s_cbranch_execz .LBB0_1 ; ISA-NEXT: ; %bb.5: ; %endif2 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; ISA-NEXT: s_add_i32 s8, s8, 1 ; ISA-NEXT: s_xor_b64 s[4:5], exec, -1 ; ISA-NEXT: s_branch .LBB0_1 ; ISA-NEXT: .LBB0_6: ; %Flow2 diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -451,7 +451,6 @@ ; GCN-O1-OPTS-NEXT: Loop Pass Manager ; GCN-O1-OPTS-NEXT: Loop Invariant Code Motion ; GCN-O1-OPTS-NEXT: Split GEPs to a variadic base and a constant offset for better CSE -; GCN-O1-OPTS-NEXT: Speculatively execute instructions ; GCN-O1-OPTS-NEXT: Scalar Evolution Analysis ; GCN-O1-OPTS-NEXT: Straight line strength reduction ; GCN-O1-OPTS-NEXT: Early CSE @@ -741,7 +740,6 @@ ; GCN-O2-NEXT: Loop Pass Manager ; GCN-O2-NEXT: Loop Invariant Code Motion ; GCN-O2-NEXT: Split GEPs to a variadic base and a constant offset for better CSE -; GCN-O2-NEXT: Speculatively execute instructions ; GCN-O2-NEXT: Scalar Evolution Analysis ; GCN-O2-NEXT: Straight line strength reduction ; GCN-O2-NEXT: Early CSE @@ -1034,7 +1032,6 @@ ; GCN-O3-NEXT: Loop Pass Manager ; GCN-O3-NEXT: Loop Invariant Code Motion ; GCN-O3-NEXT: Split GEPs to a variadic base and a constant offset for better CSE -; GCN-O3-NEXT: Speculatively execute instructions ; GCN-O3-NEXT: Scalar Evolution Analysis ; GCN-O3-NEXT: Straight line strength reduction ; GCN-O3-NEXT: Phi Values Analysis diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll --- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll +++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll @@ -761,7 +761,7 @@ ; EG: ; %bb.0: ; %bb ; EG-NEXT: ALU_PUSH_BEFORE 1, @6, KC0[CB0:0-32], KC1[] ; EG-NEXT: JUMP @5 POP:1 -; EG-NEXT: ALU 10, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 14, @8, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 0 ; EG-NEXT: POP @5 POP:1 ; EG-NEXT: CF_END @@ -769,8 +769,10 @@ ; EG-NEXT: SETNE_INT * T0.W, KC0[2].Z, 0.0, ; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, ; EG-NEXT: ALU clause starting at 8: -; EG-NEXT: LSHL T0.W, KC0[2].W, literal.x, -; EG-NEXT: LSHL * T1.W, KC0[3].Y, literal.x, +; EG-NEXT: MOV T0.X, KC0[3].Y, +; EG-NEXT: MOV * T1.X, KC0[2].W, +; EG-NEXT: LSHL T0.W, PS, literal.x, +; EG-NEXT: LSHL * T1.W, PV.X, literal.x, ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; EG-NEXT: ASHR T1.W, PS, literal.x, ; EG-NEXT: ASHR * T0.W, PV.W, literal.x, @@ -778,14 +780,16 @@ ; EG-NEXT: MOV T2.W, KC0[2].Y, ; EG-NEXT: MULLO_INT * T0.X, PS, PV.W, ; EG-NEXT: LSHR T1.X, PV.W, literal.x, -; EG-NEXT: MOV * T0.Y, PS, +; EG-NEXT: MOV T0.Y, PS, +; EG-NEXT: MOV T0.W, KC0[3].X, +; EG-NEXT: MOV * T0.W, KC0[3].Z, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; ; CM-LABEL: simplify_i24_crash: ; CM: ; %bb.0: ; %bb ; CM-NEXT: ALU_PUSH_BEFORE 1, @6, KC0[CB0:0-32], KC1[] ; CM-NEXT: JUMP @5 POP:1 -; CM-NEXT: ALU 13, @8, KC0[CB0:0-32], KC1[] +; CM-NEXT: ALU 17, @8, KC0[CB0:0-32], KC1[] ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X ; CM-NEXT: POP @5 POP:1 ; CM-NEXT: CF_END @@ -793,8 +797,10 @@ ; CM-NEXT: SETNE_INT * T0.W, KC0[2].Z, 0.0, ; CM-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, ; CM-NEXT: ALU clause starting at 8: -; CM-NEXT: LSHL T0.Z, KC0[2].W, literal.x, -; CM-NEXT: LSHL * T0.W, KC0[3].Y, literal.x, +; CM-NEXT: MOV * T0.X, KC0[3].Y, +; CM-NEXT: MOV * T1.X, KC0[2].W, +; CM-NEXT: LSHL T0.Z, PV.X, literal.x, +; CM-NEXT: LSHL * T0.W, T0.X, literal.x, ; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) ; CM-NEXT: MOV T0.Y, KC0[2].Y, ; CM-NEXT: ASHR T1.Z, PV.W, literal.x, @@ -805,7 +811,9 @@ ; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T1.Z, ; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T1.Z, ; CM-NEXT: LSHR T1.X, T0.Y, literal.x, -; CM-NEXT: MOV * T0.Y, PV.X, +; CM-NEXT: MOV T0.Y, PV.X, +; CM-NEXT: MOV T0.Z, KC0[3].X, +; CM-NEXT: MOV * T0.W, KC0[3].Z, ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) bb: %cmp = icmp eq i32 %arg0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/select-opt.ll b/llvm/test/CodeGen/AMDGPU/select-opt.ll --- a/llvm/test/CodeGen/AMDGPU/select-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/select-opt.ll @@ -143,8 +143,6 @@ ; GCN-LABEL: {{^}}regression: ; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 1.0 -; GCN: v_cmp_neq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0 -; GCN: v_cmp_eq_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 0 define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 { entry: