diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17658,7 +17658,11 @@ bool IsFastSt = false; bool IsFastLd = false; - if (TLI.isTypeLegal(StoreTy) && + // Don't try vector types if we need a rotate. We may still fail the + // legality checks for the integer type, but we can't handle the rotate + // case with vectors. + // FIXME: We could use a shuffle in place of the rotate. + if (!NeedRotate && TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+experimental-zbb,+experimental-v \ +; RUN: -riscv-v-vector-bits-min=128 | FileCheck %s + +; This test loads to values and stores them in reversed order. This previously +; asserted because part of DAGCombiner::tryStoreMerge thinks we can use an i64 +; rotate, but the loads aren't sufficiently aligned. So then it tried to use +; a vector type, but that can't handle the swapped case. + +@foo = global [2 x i32] zeroinitializer, align 4 +@bar = global [2 x i32] zeroinitializer, align 4 + +define void @baz() nounwind { +; CHECK-LABEL: baz: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, %hi(foo) +; CHECK-NEXT: addi a1, a0, %lo(foo) +; CHECK-NEXT: lw a1, 4(a1) +; CHECK-NEXT: lw a0, %lo(foo)(a0) +; CHECK-NEXT: lui a2, %hi(bar) +; CHECK-NEXT: sw a1, %lo(bar)(a2) +; CHECK-NEXT: addi a1, a2, %lo(bar) +; CHECK-NEXT: sw a0, 4(a1) +; CHECK-NEXT: ret +entry: + %0 = load i32, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @foo, i64 0, i64 1), align 4 + store i32 %0, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @bar, i64 0, i64 0), align 4 + %1 = load i32, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @foo, i64 0, i64 0), align 4 + store i32 %1, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @bar, i64 0, i64 1), align 4 + ret void +}