Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -822,7 +822,7 @@ /// Return the type of the memory being accessed. static MemAccessTy getAccessType(const TargetTransformInfo &TTI, - Instruction *Inst) { + Instruction *Inst, Value *OperandVal) { MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace); if (const StoreInst *SI = dyn_cast(Inst)) { AccessTy.MemTy = SI->getOperand(0)->getType(); @@ -836,7 +836,14 @@ } else if (IntrinsicInst *II = dyn_cast(Inst)) { switch (II->getIntrinsicID()) { case Intrinsic::prefetch: + case Intrinsic::memset: AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace(); + AccessTy.MemTy = OperandVal->getType(); + break; + case Intrinsic::memmove: + case Intrinsic::memcpy: + AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace(); + AccessTy.MemTy = OperandVal->getType(); break; default: { MemIntrinsicInfo IntrInfo; @@ -2408,7 +2415,8 @@ C->getValue().isMinSignedValue()) goto decline_post_inc; // Check for possible scaled-address reuse. - MemAccessTy AccessTy = getAccessType(TTI, UI->getUser()); + MemAccessTy AccessTy = + getAccessType(TTI, UI->getUser(), UI->getOperandValToReplace()); int64_t Scale = C->getSExtValue(); if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, /*BaseOffset=*/0, @@ -3082,7 +3090,7 @@ if (IncConst->getAPInt().getMinSignedBits() > 64) return false; - MemAccessTy AccessTy = getAccessType(TTI, UserInst); + MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand); int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr, IncOffset, /*HaseBaseReg=*/false)) @@ -3210,7 +3218,7 @@ MemAccessTy AccessTy; if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) { Kind = LSRUse::Address; - AccessTy = getAccessType(TTI, UserInst); + AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace()); } const SCEV *S = IU.getExpr(U); Index: test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll =================================================================== --- test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll +++ test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: @lsr_crash_preserve_addrspace_unknown_type( ; CHECK: %tmp4 = bitcast %0 addrspace(3)* %tmp to double addrspace(3)* ; CHECK: %scevgep5 = getelementptr double, double addrspace(3)* %tmp4, i32 1 -; CHEC: load double, double addrspace(3)* %scevgep5 +; CHECK: load double, double addrspace(3)* %scevgep5 ; CHECK: %scevgep = getelementptr i32, i32 addrspace(3)* %tmp1, i32 4 ; CHECK:%tmp14 = load i32, i32 addrspace(3)* %scevgep @@ -50,5 +50,39 @@ br label %bb1 } +; CHECK-LABEL: @lsr_crash_preserve_addrspace_unknown_type2( +; CHECK: %scevgep3 = getelementptr i8, i8 addrspace(5)* %array, i32 %j +; CHECK: %scevgep2 = getelementptr i8, i8 addrspace(5)* %array, i32 %j +; CHECK: %n8 = load i8, i8 addrspace(5)* %scevgep2, align 4 +; CHECK: call void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* %scevgep3, i8 addrspace(5)* %scevgep3, i64 12, i1 false) +define void @lsr_crash_preserve_addrspace_unknown_type2(i8 addrspace(5)* %array) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc + %j = phi i32 [ %add, %for.inc ], [ 0, %entry ] + %idx = getelementptr inbounds i8, i8 addrspace(5)* %array, i32 %j + %t = getelementptr inbounds i8, i8 addrspace(5)* %array, i32 %j + %n8 = load i8, i8 addrspace(5)* %t, align 4 + %n7 = getelementptr inbounds i8, i8 addrspace(5)* %t, i32 42 + %n9 = load i8, i8 addrspace(5)* %n7, align 4 + %cmp = icmp sgt i32 %j, 42 + %add = add nuw nsw i32 %j, 1 + br i1 %cmp, label %if.then17, label %for.inc + +if.then17: ; preds = %for.body + call void @llvm.memcpy(i8 addrspace(5)* %idx, i8 addrspace(5)* %idx, i64 12, i1 false) + br label %for.inc + +for.inc: ; preds = %for.body, %if.then17 + %exitcond = icmp eq i1 %cmp, 1 + br i1 %exitcond, label %end, label %for.body + +end: ; preds = %for.inc + ret void +} + +declare void @llvm.memcpy(i8 addrspace(5)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1) + attributes #0 = { nounwind } attributes #1 = { nounwind readnone }