diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -111,6 +111,13 @@ // TODO: Extend this to match GEP with constant offsets. Value *PtrOp = Load->getPointerOperand()->stripPointerCasts(); assert(isa(PtrOp->getType()) && "Expected a pointer type"); + unsigned AS = Load->getPointerAddressSpace(); + + // If original AS != Load's AS, we can't bitcast the original pointer and have + // to use Load's operand instead. Ideally we would want to strip pointer casts + // without changing AS, but there's no API to do that ATM. + if (AS != PtrOp->getType()->getPointerAddressSpace()) + PtrOp = Load->getPointerOperand(); Type *ScalarTy = Scalar->getType(); uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); @@ -126,7 +133,6 @@ if (!isSafeToLoadUnconditionally(PtrOp, MinVecTy, Alignment, DL, Load, &DT)) return false; - unsigned AS = Load->getPointerAddressSpace(); // Original pattern: insertelt undef, load [free casts of] ScalarPtr, 0 int OldCost = TTI.getMemoryOpCost(Instruction::Load, ScalarTy, Alignment, AS); diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -vector-combine -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=CHECK + +; ModuleID = 'load-as-transition.ll' +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" +target triple = "amdgcn-amd-amdhsa" + +%struct.hoge = type { float } + +define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture nonnull %resultptr) local_unnamed_addr #0 { +; CHECK-LABEL: @load_from_other_as( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5) +; CHECK-NEXT: [[B:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to %struct.hoge* +; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[B]], i64 0, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[C]] to <1 x float>* +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4 +; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> undef, <4 x i32> +; CHECK-NEXT: store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16 +; CHECK-NEXT: ret void +; +bb: + %a = alloca %struct.hoge, align 4, addrspace(5) + %b = addrspacecast %struct.hoge addrspace(5)* %a to %struct.hoge* + %c = getelementptr inbounds %struct.hoge, %struct.hoge* %b, i64 0, i32 0 + %d = load float, float* %c, align 4 + %e = insertelement <4 x float> undef, float %d, i32 0 + store <4 x float> %e, <4 x float>* %resultptr, align 16 + ret void +} + +attributes #0 = { "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 12.0.0"} diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/lit.local.cfg b/llvm/test/Transforms/VectorCombine/AMDGPU/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True