Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -2537,8 +2537,10 @@ } bool X86TTIImpl::enableMemCmpExpansion(unsigned &MaxLoadSize) { - // TODO: We can increase these based on available vector ops. - MaxLoadSize = ST->is64Bit() ? 8 : 4; + MaxLoadSize = getRegisterBitWidth(true) / 8; + if (MaxLoadSize == 0) { + MaxLoadSize = getRegisterBitWidth(false) / 8; + } return true; } Index: test/Transforms/CodeGenPrepare/X86/memcmp.ll =================================================================== --- test/Transforms/CodeGenPrepare/X86/memcmp.ll +++ test/Transforms/CodeGenPrepare/X86/memcmp.ll @@ -363,35 +363,18 @@ ; X32-NEXT: ret i32 [[CALL]] ; ; X64-LABEL: @cmp16( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] -; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] -; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ] -; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] -; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i64* -; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i64* -; X64-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 1 -; X64-NEXT: [[TMP12:%.*]] = getelementptr i64, i64* [[TMP10]], i64 1 -; X64-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP11]] -; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]] -; X64-NEXT: [[TMP15]] = call i64 @llvm.bswap.i64(i64 [[TMP13]]) -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17:%.*]] = icmp eq i64 [[TMP15]], [[TMP16]] -; X64-NEXT: br i1 [[TMP17]], label [[ENDBLOCK]], label [[RES_BLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ] -; X64-NEXT: ret i32 [[PHI_RES]] +; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i128* +; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i128* +; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]] +; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]] +; X64-NEXT: [[TMP5:%.*]] = call i128 @llvm.bswap.i128(i128 [[TMP3]]) +; X64-NEXT: [[TMP6:%.*]] = call i128 @llvm.bswap.i128(i128 [[TMP4]]) +; X64-NEXT: [[TMP7:%.*]] = icmp ugt i128 [[TMP5]], [[TMP6]] +; X64-NEXT: [[TMP8:%.*]] = icmp ult i128 [[TMP5]], [[TMP6]] +; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 +; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 +; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] +; X64-NEXT: ret i32 [[TMP11]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) ret i32 %call @@ -753,27 +736,13 @@ ; X32-NEXT: ret i32 [[CONV]] ; ; X64-LABEL: @cmp_eq16( -; X64-NEXT: loadbb: -; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]] -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] -; X64: res_block: -; X64-NEXT: br label [[ENDBLOCK:%.*]] -; X64: loadbb1: -; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i64* -; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i64* -; X64-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 1 -; X64-NEXT: [[TMP8:%.*]] = getelementptr i64, i64* [[TMP6]], i64 1 -; X64-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP7]] -; X64-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] -; X64-NEXT: [[TMP11:%.*]] = icmp ne i64 [[TMP9]], [[TMP10]] -; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]] -; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i128* +; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i128* +; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]] +; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]] +; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ;