diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3483,7 +3483,45 @@ return true; } + // Fold bitcast (select cond, ptr1, ptr2) => select cond, bitcast(ptr1), + // bitcast(ptr2) + bool foldBitCastSelect(BitCastInst &BC) { + SelectInst *Sel = cast(BC.getOperand(0)); + + LLVM_DEBUG(dbgs() << " Rewriting bitcast(select) -> select(bitcast):" + << "\n original: " << *Sel << "\n " + << BC); + + IRBuilderTy Builder(&BC); + + Type *Ty = BC.getType(); + Value *True = Sel->getTrueValue(); + Value *NTrue = + Builder.CreateBitCast(True, Ty, True->getName() + ".sroa.cast"); + + Value *False = Sel->getFalseValue(); + Value *NFalse = + Builder.CreateBitCast(False, Ty, False->getName() + ".sroa.cast"); + + Value *NSel = Builder.CreateSelect(Sel->getCondition(), NTrue, NFalse, + Sel->getName() + ".sroa.sel"); + Visited.erase(&BC); + BC.replaceAllUsesWith(NSel); + BC.eraseFromParent(); + Instruction *NSelI = cast(NSel); + Visited.insert(NSelI); + enqueueUsers(*NSelI); + + LLVM_DEBUG(dbgs() << "\n to: " << *NTrue << "\n " + << *NFalse << "\n " << *NSel << '\n'); + + return true; + } + bool visitBitCastInst(BitCastInst &BC) { + if (isa(BC.getOperand(0)) && foldBitCastSelect(BC)) + return true; + enqueueUsers(BC); return false; } diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll --- a/llvm/test/Transforms/SROA/phi-and-select.ll +++ b/llvm/test/Transforms/SROA/phi-and-select.ll @@ -60,23 +60,14 @@ ret i32 %result } -; If bitcast isn't considered a safe phi/select use, the alloca -; remains as an array. -; FIXME: Why isn't this identical to test2? define float @test2_bitcast() { ; CHECK-LABEL: @test2_bitcast( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, i32* [[A_SROA_0]], align 4 -; CHECK-NEXT: store i32 1, i32* [[A_SROA_3]], align 4 -; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_V0:%.*]] = load i32, i32* [[A_SROA_0]], align 4 -; CHECK-NEXT: [[A_SROA_3_0_A_SROA_3_4_V1:%.*]] = load i32, i32* [[A_SROA_3]], align 4 -; CHECK-NEXT: [[COND:%.*]] = icmp sle i32 [[A_SROA_0_0_A_SROA_0_0_V0]], [[A_SROA_3_0_A_SROA_3_4_V1]] -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], i32* [[A_SROA_3]], i32* [[A_SROA_0]] -; CHECK-NEXT: [[SELECT_BC:%.*]] = bitcast i32* [[SELECT]] to float* -; CHECK-NEXT: [[RESULT:%.*]] = load float, float* [[SELECT_BC]], align 4 -; CHECK-NEXT: ret float [[RESULT]] +; CHECK-NEXT: [[COND:%.*]] = icmp sle i32 0, 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 1 to float +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 0 to float +; CHECK-NEXT: [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], float [[TMP0]], float [[TMP1]] +; CHECK-NEXT: ret float [[RESULT_SROA_SPECULATED]] ; entry: %a = alloca [2 x i32] diff --git a/llvm/test/Transforms/SROA/select-bitcast.ll b/llvm/test/Transforms/SROA/select-bitcast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/select-bitcast.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -sroa < %s | FileCheck %s + +%st.half = type { half } + +define <2 x i16> @test_bitcast_select_arg_alloca(i1 %cond1, i1 %cond2) { +; CHECK-LABEL: @test_bitcast_select_arg_alloca( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast half 0xHFFFF to i16 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast half 0xH0000 to i16 +; CHECK-NEXT: [[LD1_SROA_SPECULATED:%.*]] = select i1 [[COND1:%.*]], i16 [[TMP1]], i16 [[TMP2]] +; CHECK-NEXT: [[V1:%.*]] = insertelement <2 x i16> undef, i16 [[LD1_SROA_SPECULATED]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast half 0xHFFFF to i16 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast half 0xH0000 to i16 +; CHECK-NEXT: [[LD2_SROA_SPECULATED:%.*]] = select i1 [[COND2:%.*]], i16 [[TMP3]], i16 [[TMP4]] +; CHECK-NEXT: [[V2:%.*]] = insertelement <2 x i16> [[V1]], i16 [[LD2_SROA_SPECULATED]], i32 1 +; CHECK-NEXT: ret <2 x i16> [[V2]] +; + %true = alloca half, align 2 + %false = alloca half, align 2 + store half 0xHFFFF, half* %true, align 2 + store half 0xH0000, half* %false, align 2 + %false.cast = bitcast half* %false to %st.half* + %true.cast = bitcast half* %true to %st.half* + %sel1 = select i1 %cond1, %st.half* %true.cast, %st.half* %false.cast + %cast1 = bitcast %st.half* %sel1 to i16* + %ld1 = load i16, i16* %cast1, align 2 + %v1 = insertelement <2 x i16> undef, i16 %ld1, i32 0 + %sel2 = select i1 %cond2, %st.half* %true.cast, %st.half* %false.cast + %cast2 = bitcast %st.half* %sel2 to i16* + %ld2 = load i16, i16* %cast2, align 2 + %v2 = insertelement <2 x i16> %v1, i16 %ld2, i32 1 + ret <2 x i16> %v2 +}