Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -437,6 +437,12 @@ return false; } + /// Return true if the target has a quick way to compare values of the given + /// type. By default, assume that any legal type can be compared efficiently. + virtual bool hasFastEqualityCompare(EVT VT) const { + return isTypeLegal(VT); + } + /// Return true if the target should transform: /// (X & Y) == Y ---> (~X & Y) == 0 /// (X & Y) != Y ---> (~X & Y) != 0 Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6056,13 +6056,11 @@ LoadVT = MVT::i64; LoadTy = Type::getInt64Ty(CSize->getContext()); break; - /* case 16: + // Arbitrarily choosing a commonly supported vector type for 16-byte loads. LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(CSize->getContext()); - LoadTy = VectorType::get(LoadTy, 4); + LoadTy = VectorType::get(Type::getInt32Ty(CSize->getContext()), 4); break; - */ } // This turns into unaligned loads. We only do this if the target natively @@ -6085,8 +6083,20 @@ return false; } + // For a vector type, we need to do a scalar comparison for an integer type + // that has the same size as the vector. + EVT CmpVT = + LoadVT.isVector() ? LoadVT.getIntegerVT(LoadVT.getSizeInBits()) : LoadVT; + if (!TLI.hasFastEqualityCompare(CmpVT)) + return false; + SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); + + // Bitcast to integer type if the loads are vectors. + LHSVal = DAG.getBitcast(CmpVT, LHSVal); + RHSVal = DAG.getBitcast(CmpVT, RHSVal); + SDValue SetCC = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); processIntegerCallValue(I, SetCC, false); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -815,6 +815,9 @@ bool hasAndNotCompare(SDValue Y) const override; + /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. + bool hasFastEqualityCompare(EVT VT) const override; + /// Return the value type to use for ISD::SETCC. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4637,6 +4637,12 @@ return true; } +bool X86TargetLowering::hasFastEqualityCompare(EVT VT) const { + // TODO: 256- and 512-bit types should be allowed, but make sure that those + // cases are handled in combineVectorSizedSetCCEquality(). + return isTypeLegal(VT) || (Subtarget.hasSSE2() && VT == MVT::i128); +} + /// Val is the undef sentinel value or equal to the specified value. static bool isUndefOrEqual(int Val, int CmpVal) { return ((Val == SM_SentinelUndef) || (Val == CmpVal)); Index: test/CodeGen/X86/memcmp.ll =================================================================== --- test/CodeGen/X86/memcmp.ll +++ test/CodeGen/X86/memcmp.ll @@ -97,12 +97,12 @@ define i1 @length16(i8* %x, i8* %y) nounwind { ; CHECK-LABEL: length16: ; CHECK: # BB#0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movl $16, %edx -; CHECK-NEXT: callq memcmp -; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: movdqu (%rsi), %xmm0 +; CHECK-NEXT: movdqu (%rdi), %xmm1 +; CHECK-NEXT: pcmpeqb %xmm0, %xmm1 +; CHECK-NEXT: pmovmskb %xmm1, %eax +; CHECK-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; CHECK-NEXT: setne %al -; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp ne i32 %call, 0 @@ -112,13 +112,11 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind { ; CHECK-LABEL: length16_const: ; CHECK: # BB#0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: movl $.L.str, %esi -; CHECK-NEXT: movl $16, %edx -; CHECK-NEXT: callq memcmp -; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: movdqu (%rdi), %xmm0 +; CHECK-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 +; CHECK-NEXT: pmovmskb %xmm0, %eax +; CHECK-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; CHECK-NEXT: sete %al -; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 16) nounwind %c = icmp eq i32 %m, 0