Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -453,15 +453,24 @@ const Twine &Suffix = "") { assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) && "can't fold an atomic load to requested type"); - + Value *Ptr = LI.getPointerOperand(); unsigned AS = LI.getPointerAddressSpace(); SmallVector, 8> MD; LI.getAllMetadata(MD); + Value *NewPtr = nullptr; + if (auto *BC = dyn_cast(Ptr)) { + Value *Op0 = BC->getOperand(0); + if (Op0->getType()->getPointerElementType() == NewTy && + Op0->getType()->getPointerAddressSpace() == AS) + NewPtr = Op0; + } + if (!NewPtr) + NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)); + LoadInst *NewLoad = IC.Builder.CreateAlignedLoad( - IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)), - LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); + NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); MDBuilder MDB(NewLoad->getContext()); for (const auto &MDPair : MD) { Index: test/Transforms/InstCombine/PR37526.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/PR37526.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s + +%struct.x5 = type { i32 (...)**, i32, i32 } + +define void @repro(%struct.x5*, %struct.x5* %x12, %struct.x5* %x13) { +; CHECK-LABEL: @repro( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X37_I_I:%.*]] = getelementptr inbounds [[STRUCT_X5:%.*]], %struct.x5* [[X12:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[X37_I:%.*]] = getelementptr inbounds [[STRUCT_X5]], %struct.x5* [[X13:%.*]], i64 0, i32 1 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[X14_0:%.*]] = phi %struct.x5* [ undef, [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[FOR_COND]] ] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds [[STRUCT_X5]], %struct.x5* [[X14_0]], i64 -1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[X37_I_I]], align 4 +; CHECK-NEXT: [[X37_I2_I:%.*]] = getelementptr inbounds [[STRUCT_X5]], %struct.x5* [[X14_0]], i64 -1, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[X37_I2_I]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP_I]], i32* [[X37_I2_I]], i32* [[X37_I_I]] +; CHECK-NEXT: [[STOREMERGE1:%.*]] = load i32, i32* [[SPEC_SELECT]], align 8 +; CHECK-NEXT: store i32 [[STOREMERGE1]], i32* [[X37_I]], align 8 +; CHECK-NEXT: br label [[FOR_COND]] +; +entry: + %x37.i.i = getelementptr inbounds %struct.x5, %struct.x5* %x12, i64 0, i32 1 + %x37.i = getelementptr inbounds %struct.x5, %struct.x5* %x13, i64 0, i32 1 + %1 = bitcast i32* %x37.i to i64* + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %x14.0 = phi %struct.x5* [ undef, %entry ], [ %incdec.ptr, %for.cond ] + %incdec.ptr = getelementptr inbounds %struct.x5, %struct.x5* %x14.0, i64 -1 + %2 = load i32, i32* %x37.i.i, align 4 + %x37.i2.i = getelementptr inbounds %struct.x5, %struct.x5* %x14.0, i64 -1, i32 1 + %3 = load i32, i32* %x37.i2.i, align 4 + %cmp.i = icmp slt i32 %2, %3 + %spec.select = select i1 %cmp.i, i32* %x37.i2.i, i32* %x37.i.i + %storemerge.in = bitcast i32* %spec.select to i64* + %storemerge = load i64, i64* %storemerge.in, align 8 + store i64 %storemerge, i64* %1, align 8 + br label %for.cond +} +