diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6164,7 +6164,7 @@ // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. APInt Constant = APInt::getZero(1); - if (const ConstantSDNode *C = dyn_cast(N1)) { + if (const ConstantSDNode *C = isConstOrConstSplat(N1)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast(N1)) { APInt SplatValue, SplatUndef; diff --git a/llvm/test/CodeGen/AArch64/sve-fold-loadext-and-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-fold-loadext-and-splat-vector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fold-loadext-and-splat-vector.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Ensure that a no-op 'and' after an extending load gets removed when the and is +; constructed via a splat_vector node. +define @fold_loadext_and(ptr %ptr, i32 %needle, %b) #0 { +; CHECK-LABEL: fold_loadext_and: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %load = load , ptr %ptr, align 4 + %ext = zext %load to + %splatinsert = insertelement poison, i64 4294967295, i64 0 + %splat = shufflevector %splatinsert, poison, zeroinitializer + %and = and %ext, %splat + ret %and +} + +; Same as above but testing the case we care about. Here the vscale x 2 x i32 +; types get legalized into vscale x 2 x i64 types which introduces the extending +; load and 'and' nodes similar to the above case. +define @fold_loadext_and_legalize(ptr %ptr, %a) #0 { +; CHECK-LABEL: fold_loadext_and_legalize: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z0.d, z0.d, #0xffffffff +; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0] +; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z0.d +; CHECK-NEXT: ret + %load = load , ptr %ptr + %cmp = icmp eq %load, %a + ret %cmp +} + +attributes #0 = { "target-features"="+sve" }