Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -6394,6 +6394,7 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); LLVMContext *Ctx = DAG.getContext(); MVT PVT = TLI.getPointerTy(DAG.getDataLayout()); + SDValue Brdcst; if (Subtarget.hasAVX()) { if (SplatBitSize <= 64 && Subtarget.hasAVX2() && !(SplatBitSize == 64 && Subtarget.is32Bit())) { @@ -6410,9 +6411,8 @@ CVT, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); - SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl, + Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl, MVT::getVectorVT(CVT, Repeat), Ld); - return DAG.getBitcast(VT, Brdcst); } else if (SplatBitSize == 32 || SplatBitSize == 64) { // Splatted value can fit in one FLOAT constant in constant pool. // Load the constant and broadcast it. @@ -6432,9 +6432,8 @@ CVT, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); - SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl, + Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl, MVT::getVectorVT(CVT, Repeat), Ld); - return DAG.getBitcast(VT, Brdcst); } else if (SplatBitSize > 64) { // Load the vector of constants and broadcast it. MVT CVT = VT.getScalarType(); @@ -6447,9 +6446,15 @@ MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); - SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld); - return DAG.getBitcast(VT, Brdcst); + Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld); + } else { + return SDValue(); } + // If the load already had a use, we can't fold it into the broadcast. + // So fallback to the original state. + if (!Ld.hasOneUse()) + return SDValue(); + return DAG.getBitcast(VT, Brdcst); } } return SDValue(); Index: test/CodeGen/X86/pr31306.ll =================================================================== --- test/CodeGen/X86/pr31306.ll +++ test/CodeGen/X86/pr31306.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -O2 -mattr=avx512f | FileCheck %s + +%struct.i = type { %struct.a, %struct.a } +%struct.a = type { i48, i40, i48, i40 } + +@m = local_unnamed_addr global %struct.i zeroinitializer, align 8 +@n = local_unnamed_addr global %struct.i zeroinitializer, align 8 +@_ZN1i1lE = external local_unnamed_addr global %struct.a, align 8 + +; Function Attrs: norecurse nounwind uwtable +define void @_Z1ov() local_unnamed_addr { +; CHECK-LABEL: _Z1ov: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vmovups {{.*}}(%rip), %ymm0 +; CHECK-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT: vmovups %ymm0, {{.*}}(%rip) +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [18446744073709547520,18446742974197923840,18446744069414584320,18446744065119617024,18446744073709547520,18446742974197923840,18446744069414584320,18446744065119617024] +; CHECK-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm1 +; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm2 = mem[0,1,2,3,0,1,2,3] +; CHECK-NEXT: vporq %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vmovdqu64 %zmm1, {{.*}}(%rip) +; CHECK-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vporq %zmm2, %zmm0, %zmm0 +; CHECK-NEXT: vmovdqu64 %zmm0, {{.*}}(%rip) +; CHECK-NEXT: retq +entry: + %0 = load <4 x i64>, <4 x i64>* bitcast (%struct.a* @_ZN1i1lE to <4 x i64>*), align 8 + %1 = and <4 x i64> %0, + %2 = or <4 x i64> %1, + store <4 x i64> %2, <4 x i64>* bitcast (%struct.a* @_ZN1i1lE to <4 x i64>*), align 8 + %3 = load <8 x i64>, <8 x i64>* bitcast (%struct.i* @n to <8 x i64>*), align 8 + %4 = and <8 x i64> %3, + %5 = or <8 x i64> %4, + store <8 x i64> %5, <8 x i64>* bitcast (%struct.i* @n to <8 x i64>*), align 8 + %6 = load <8 x i64>, <8 x i64>* bitcast (%struct.i* @m to <8 x i64>*), align 8 + %7 = and <8 x i64> %6, + %8 = or <8 x i64> %7, + store <8 x i64> %8, <8 x i64>* bitcast (%struct.i* @m to <8 x i64>*), align 8 + ret void +} +