diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19692,8 +19692,10 @@ // Make sure the first element matches // (zext (extract_vector_elt X, C)) + // Offset must be a constant multiple of the + // known-minimum vector length of the result type. int64_t Offset = checkElem(Op0); - if (Offset < 0) + if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0) return SDValue(); unsigned NumElems = N->getNumOperands(); diff --git a/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll b/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s +define <2 x i64> @test1(<4 x i32> %x) #0 { +; CHECK-LABEL: test1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: mov w9, v0.s[2] +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ret + %i1 = extractelement <4 x i32> %x, i32 1 + %zi1 = zext i32 %i1 to i64 + %i2 = extractelement <4 x i32> %x, i32 2 + %zi2 = zext i32 %i2 to i64 + %v1 = insertelement <2 x i64> undef, i64 %zi1, i32 0 + %v2 = insertelement <2 x i64> %v1, i64 %zi2, i32 1 + ret <2 x i64> %v2 +} + +define <4 x i64> @test2(<4 x i32> %0) { +; CHECK-LABEL: test2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mov w8, v0.s[1] +; CHECK-NEXT: mov w9, v0.s[2] +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov v1.d[1], x9 +; CHECK-NEXT: ret +entry: + %1 = add <4 x i32> %0, + %2 = extractelement <4 x i32> %1, i32 1 + %zext1 = zext i32 %2 to i64 + %3 = extractelement <4 x i32> %1, i32 2 + %zext2 = zext i32 %3 to i64 + %4 = insertelement <4 x i64> undef, i64 %zext1, i32 2 + %5 = insertelement <4 x i64> %4, i64 %zext2, i32 3 + ret <4 x i64> %5 +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll @@ -79,9 +79,8 @@ ; GFX8-NEXT: ; %bb.2: ; GFX8-NEXT: s_mov_b64 exec, s[2:3] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_alignbit_b32 v0, v6, v5, 16 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v5 +; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v6 ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: s_mov_b32 m0, -1 ; GFX8-NEXT: ds_write2_b32 v2, v0, v1 offset0:7 offset1:8