Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -453,15 +453,20 @@ const Twine &Suffix = "") { assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) && "can't fold an atomic load to requested type"); - + Value *Ptr = LI.getPointerOperand(); unsigned AS = LI.getPointerAddressSpace(); SmallVector, 8> MD; LI.getAllMetadata(MD); + Value *NewPtr = nullptr; + if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) && + NewPtr->getType()->getPointerElementType() == NewTy && + NewPtr->getType()->getPointerAddressSpace() == AS)) + NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)); + LoadInst *NewLoad = IC.Builder.CreateAlignedLoad( - IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)), - LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); + NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); MDBuilder MDB(NewLoad->getContext()); for (const auto &MDPair : MD) { Index: llvm/trunk/test/Transforms/InstCombine/PR37526.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/PR37526.ll +++ llvm/trunk/test/Transforms/InstCombine/PR37526.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s + +define void @PR37526(i32* %pz, i32* %px, i32* %py) { +; CHECK-LABEL: @PR37526( +; CHECK-NEXT: [[T2:%.*]] = load i32, i32* [[PY:%.*]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[PX:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[T2]], [[T3]] +; CHECK-NEXT: [[R1:%.*]] = select i1 [[CMP]], i32 [[T3]], i32 [[T2]] +; CHECK-NEXT: store i32 [[R1]], i32* [[PZ:%.*]], align 4 +; CHECK-NEXT: ret void +; + %t1 = bitcast i32* %pz to i64* + %t2 = load i32, i32* %py + %t3 = load i32, i32* %px + %cmp = icmp slt i32 %t2, %t3 + %select = select i1 %cmp, i32* %px, i32* %py + %bc = bitcast i32* %select to i64* + %r = load i64, i64* %bc + store i64 %r, i64* %t1 + ret void +}