Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -162,27 +162,34 @@ /// The ending offset, not included in the range. uint64_t EndOffset = 0; - /// Storage for both the use of this slice and whether it can be - /// split. - PointerIntPair UseAndIsSplittable; + // Slice use. + Use *U; + + /// True if this slice can be split. + bool IsSplittable : 1; + + /// A sequentially growing number to make sure sort is stable if all the + /// rest compares are equal. + unsigned Ordinal : 7; public: Slice() = default; - Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable) + Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable, + unsigned Ordinal) : BeginOffset(BeginOffset), EndOffset(EndOffset), - UseAndIsSplittable(U, IsSplittable) {} + U(U), IsSplittable(IsSplittable), Ordinal(Ordinal) {} uint64_t beginOffset() const { return BeginOffset; } uint64_t endOffset() const { return EndOffset; } - bool isSplittable() const { return UseAndIsSplittable.getInt(); } - void makeUnsplittable() { UseAndIsSplittable.setInt(false); } + bool isSplittable() const { return IsSplittable; } + void makeUnsplittable() { IsSplittable = false; } - Use *getUse() const { return UseAndIsSplittable.getPointer(); } + Use *getUse() const { return U; } bool isDead() const { return getUse() == nullptr; } - void kill() { UseAndIsSplittable.setPointer(nullptr); } + void kill() { U = nullptr; } /// Support for ordering ranges. /// @@ -199,7 +206,9 @@ return !isSplittable(); if (endOffset() > RHS.endOffset()) return true; - return false; + if (endOffset() < RHS.endOffset()) + return false; + return Ordinal < RHS.Ordinal; } /// Support comparison with a single offset to allow binary searches. @@ -286,6 +295,9 @@ /// need to replace with undef. ArrayRef getDeadOperands() const { return DeadOperands; } + /// Return and increment ordinal to be used for Slices sorting on tie. + unsigned getOrdinal() { return Ordinal++; } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const; void printSlice(raw_ostream &OS, const_iterator I, @@ -341,6 +353,9 @@ /// want to swap this particular input for undef to simplify the use lists of /// the alloca. SmallVector DeadOperands; + + /// Incrementing ordinal to be used for Slices sorting on tie. + unsigned Ordinal; }; /// A partition of the slices. @@ -704,7 +719,8 @@ EndOffset = AllocSize; } - AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable)); + AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable, + AS.getOrdinal())); } void visitBitCastInst(BitCastInst &BC) { @@ -1055,7 +1071,7 @@ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) AI(AI), #endif - PointerEscapingInstr(nullptr) { + PointerEscapingInstr(nullptr), Ordinal(0) { SliceBuilder PB(DL, AI, *this); SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { @@ -3971,12 +3987,12 @@ int Idx = 0, Size = Offsets.Splits.size(); for (;;) { auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); - auto AS = LI->getPointerAddressSpace(); - auto *PartPtrTy = PartTy->getPointerTo(AS); + auto PtrAS = LI->getPointerAddressSpace(); + auto *PartPtrTy = PartTy->getPointerTo(PtrAS); LoadInst *PLoad = IRB.CreateAlignedLoad( PartTy, getAdjustedPtr(IRB, DL, BasePtr, - APInt(DL.getIndexSizeInBits(AS), PartOffset), + APInt(DL.getIndexSizeInBits(PtrAS), PartOffset), PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset), /*IsVolatile*/ false, LI->getName()); @@ -3991,7 +4007,7 @@ NewSlices.push_back( Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, &PLoad->getOperandUse(PLoad->getPointerOperandIndex()), - /*IsSplittable*/ false)); + /*IsSplittable*/ false, AS.getOrdinal())); LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() << ", " << NewSlices.back().endOffset() << "): " << *PLoad << "\n"); @@ -4127,11 +4143,11 @@ // And store this partition. IRB.SetInsertPoint(SI); - auto AS = SI->getPointerAddressSpace(); + auto PtrAS = SI->getPointerAddressSpace(); StoreInst *PStore = IRB.CreateAlignedStore( PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getIndexSizeInBits(AS), PartOffset), + APInt(DL.getIndexSizeInBits(PtrAS), PartOffset), StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset), /*IsVolatile*/ false); @@ -4140,7 +4156,7 @@ NewSlices.push_back( Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, &PStore->getOperandUse(PStore->getPointerOperandIndex()), - /*IsSplittable*/ false)); + /*IsSplittable*/ false, AS.getOrdinal())); LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() << ", " << NewSlices.back().endOffset() << "): " << *PStore << "\n"); Index: llvm/test/Transforms/SROA/phi-gep.ll =================================================================== --- llvm/test/Transforms/SROA/phi-gep.ll +++ llvm/test/Transforms/SROA/phi-gep.ll @@ -367,6 +367,55 @@ unreachable } +define i32 @test_sroa_gep_cast_phi_gep(i1 %cond) { +; CHECK-LABEL: @test_sroa_gep_cast_phi_gep( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A_SROA_0_0_GEP_A_CAST_TO_I32_SROA_CAST:%.*]] = bitcast i32* [[A_SROA_0]] to float* +; CHECK-NEXT: [[A_SROA_0_0_GEP_A_CAST_TO_I32_SROA_CAST2:%.*]] = bitcast i32* [[A_SROA_0]] to float* +; CHECK-NEXT: [[A_SROA_0_0_GEP_SROA_CAST:%.*]] = bitcast i32* [[A_SROA_0]] to float* +; CHECK-NEXT: store i32 1065353216, i32* [[A_SROA_0]], align 4 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[FOR:%.*]], label [[END:%.*]] +; CHECK: for: +; CHECK-NEXT: [[PHI_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I:%.*]], [[FOR]] ] +; CHECK-NEXT: [[PHI:%.*]] = phi float* [ [[A_SROA_0_0_GEP_A_CAST_TO_I32_SROA_CAST]], [[ENTRY]] ], [ [[GEP_FOR_CAST_TO_I32:%.*]], [[FOR]] ] +; CHECK-NEXT: [[PHI_SROA_PHI:%.*]] = phi float* [ [[A_SROA_0_0_GEP_SROA_CAST]], [[ENTRY]] ], [ [[GEP_FOR_CAST_TO_I32_SROA_GEP:%.*]], [[FOR]] ] +; CHECK-NEXT: [[I]] = add i32 [[PHI_I]], 1 +; CHECK-NEXT: [[GEP_FOR_CAST:%.*]] = bitcast float* [[PHI_SROA_PHI]] to i32* +; CHECK-NEXT: [[GEP_FOR_CAST_TO_I32]] = bitcast i32* [[GEP_FOR_CAST]] to float* +; CHECK-NEXT: [[GEP_FOR_CAST_TO_I32_SROA_GEP]] = getelementptr inbounds float, float* [[GEP_FOR_CAST_TO_I32]], i32 0 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[I]], 10 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[FOR]], label [[END]] +; CHECK: end: +; CHECK-NEXT: [[PHI_END:%.*]] = phi float* [ [[A_SROA_0_0_GEP_A_CAST_TO_I32_SROA_CAST2]], [[ENTRY]] ], [ [[PHI]], [[FOR]] ] +; CHECK-NEXT: [[PHI_END_1:%.*]] = bitcast float* [[PHI_END]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[PHI_END_1]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; +entry: + %a = alloca %pair, align 4 + %gep_a = getelementptr inbounds %pair, %pair* %a, i32 0, i32 1 + %gep_a_cast_to_float = bitcast i32* %gep_a to float* + store float 1.0, float* %gep_a_cast_to_float, align 4 + br i1 %cond, label %for, label %end + +for: + %phi_i = phi i32 [ 0, %entry ], [ %i, %for ] + %phi = phi float* [ %gep_a_cast_to_float, %entry], [ %gep_for_cast_to_float, %for ] + %i = add i32 %phi_i, 1 + %gep_for = getelementptr inbounds float, float* %phi, i32 0 + %gep_for_cast = bitcast float* %gep_for to i32* + %gep_for_cast_to_float = bitcast i32* %gep_for_cast to float* + %loop.cond = icmp ult i32 %i, 10 + br i1 %loop.cond, label %for, label %end + +end: + %phi_end = phi float* [ %gep_a_cast_to_float, %entry], [ %phi, %for ] + %phi_end.1 = bitcast float* %phi_end to i32* + %load = load i32, i32* %phi_end.1, align 4 + ret i32 %load +} + declare %pair* @foo() declare i32 @__gxx_personality_v0(...)