Avoids some regressions in the math libraries in a future
patch.
Details
Details
- Reviewers
rampitec foad Pierre-vh sebastian-ne - Group Reviewers
Restricted Project
Diff Detail
Diff Detail
Event Timeline
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | ||
---|---|---|
605–609 |
Comment Actions
This commit is causing some regression for us in some games.
Here's a reproducer (you get a cannot yet select error):
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S 32-A5-G1-ni:7" target triple = "amdgcn--amdpal" define amdgpu_cs void @_amdgpu_cs_main(float %.i2369) { .entry: %0 = fcmp uge float %.i2369, 0.000000e+00 %.i2379 = select i1 %0, i32 1, i32 0 %.i0436 = bitcast i32 %.i2379 to float %.i0440 = fneg float %.i0436 %1 = fcmp uge float %.i0436, 0.000000e+00 %.i2495 = select i1 %1, i32 %.i2379, i32 0 %.i0552 = bitcast i32 %.i2495 to float %.i0592 = fmul float %.i0440, %.i0552 %.i0721 = fcmp ogt float %.i0592, 0.000000e+00 br i1 %.i0721, label %6, label %2 2: ; preds = %.entry %3 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 1, i32 0) %4 = shufflevector <2 x i32> %3, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> %5 = bitcast <4 x i32> %4 to <4 x float> %.i0753 = extractelement <4 x float> %5, i64 0 br label %6 6: ; preds = %2, %.entry ret void } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) #0 attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
It looks to me that the part that does a "ReplaceAllUsesWith" is causing the problem since it replaces a select returning an i32 with one returning a float, which in this case causes failure in the subsequent bitcast. (See %.i0436 bitcast)