diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1297,10 +1297,11 @@ CV->replaceAllUsesWith(V); } - // Bitcast might not be an Instruction, if the value being loaded is a - // constant. In that case, no need to reorder anything. - if (Instruction *BitcastInst = dyn_cast(Bitcast)) - reorder(BitcastInst); + // Since we might have opaque pointers we might end up using the pointer + // operand of the first load (wrt. memory loaded) for the vector load. Since + // this first load might not be the first in the block we potentially need to + // reorder the pointer operand (and its operands). + reorder(LI); eraseInstructions(Chain); diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/opaque_ptr.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/opaque_ptr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/opaque_ptr.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=amdgcn-amd-amdhsa -basic-aa -load-store-vectorizer -S -o - %s | FileCheck %s + +; Vectorize and emit valid code (Issue #54896). + +%S = type { i64, i64 } +@S = external global %S + +define i64 @order() { +; CHECK-LABEL: @order( +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds [[S:%.*]], ptr @S, i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[IDX0]], align 8 +; CHECK-NEXT: [[L01:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[L12:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[L01]], [[L12]] +; CHECK-NEXT: ret i64 [[ADD]] +; + %idx1 = getelementptr inbounds %S, ptr @S, i32 0, i32 1 + %l1 = load i64, i64* %idx1, align 8 + %idx0 = getelementptr inbounds %S, ptr @S, i32 0, i32 0 + %l0 = load i64, i64* %idx0, align 8 + %add = add i64 %l0, %l1 + ret i64 %add +}