diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1297,10 +1297,8 @@ CV->replaceAllUsesWith(V); } - // Bitcast might not be an Instruction, if the value being loaded is a - // constant. In that case, no need to reorder anything. - if (Instruction *BitcastInst = dyn_cast(Bitcast)) - reorder(BitcastInst); + // Since we might have opaque pointers we might end up using the pointer operand of the first load (wrt. memory loaded) for the vector load. Since this first load might not be the first in the block we potentially need to reorder the pointer operand (and its operands). + reorder(LI); eraseInstructions(Chain); diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/opaque_ptr.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/opaque_ptr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/opaque_ptr.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=amdgcn-amd-amdhsa -basic-aa -load-store-vectorizer -S -o - %s | FileCheck %s + +; Vectorize and emit valid code (Issue #54896). + +%S = type { i64, i64 } +@S = external global %S + +define i64 @order() { +; CHECK-LABEL: @order( +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds [[S:%.*]], ptr @S, i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[IDX0]], align 8 +; CHECK-NEXT: [[L01:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[L12:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[L01]], [[L12]] +; CHECK-NEXT: ret i64 [[ADD]] +; + %idx1 = getelementptr inbounds %S, ptr @S, i32 0, i32 1 + %l1 = load i64, i64* %idx1, align 8 + %idx0 = getelementptr inbounds %S, ptr @S, i32 0, i32 0 + %l0 = load i64, i64* %idx0, align 8 + %add = add i64 %l0, %l1 + ret i64 %add +}