Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -747,11 +747,8 @@ LD->getPointerInfo().getWithOffset(Offset), LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); } else { - EVT LoadVT = WideVT; - while (RemainingBytes < LoadBytes) { - LoadBytes >>= 1; // Reduce the load size by half. - LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); - } + LoadBytes = RemainingBytes; + EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LoadVT, Index: llvm/test/CodeGen/X86/load-local-v4i5.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/load-local-v4i5.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s +@0 = internal unnamed_addr constant [4 x i5] [i5 2, i5 0, i5 2, i5 -1], align 1 + +; Function Attrs: nobuiltin nounwind +define void @_start() { +; CHECK-LABEL: _start: +; CHECK: # %bb.0: # %Entry +; CHECK-NEXT: movl {{.*}}(%rip), %eax +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx +; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi +; CHECK-NEXT: andl $31, %eax +; CHECK-NEXT: andl $31, %esi +; CHECK-NEXT: shll $5, %esi +; CHECK-NEXT: orl %eax, %esi +; CHECK-NEXT: andl $31, %edx +; CHECK-NEXT: shll $10, %edx +; CHECK-NEXT: orl %esi, %edx +; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shll $15, %ecx +; CHECK-NEXT: orl %edx, %ecx +; CHECK-NEXT: movw %cx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: shrl $16, %ecx +; CHECK-NEXT: andl $15, %ecx +; CHECK-NEXT: movb %cl, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: cmpb $31, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: # %Then +; CHECK-NEXT: int3 +; CHECK-NEXT: .LBB0_2: # %EndIf +; CHECK-NEXT: retq +Entry: + %x = alloca [4 x i5], align 1 + %y = alloca <4 x i5>, align 4 + %z = alloca i5, align 1 + %0 = bitcast [4 x i5]* %x to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 bitcast ([4 x i5]* @0 to i8*), i64 4, i1 false) + %1 = getelementptr inbounds [4 x i5], [4 x i5]* %x, i64 0, i64 0 + %2 = load i5, i5* %1 + %3 = insertelement <4 x i5> undef, i5 %2, i32 0 + %4 = getelementptr inbounds [4 x i5], [4 x i5]* %x, i64 0, i64 1 + %5 = load i5, i5* %4 + %6 = insertelement <4 x i5> %3, i5 %5, i32 1 + %7 = getelementptr inbounds [4 x i5], [4 x i5]* %x, i64 0, i64 2 + %8 = load i5, i5* %7 + %9 = insertelement <4 x i5> %6, i5 %8, i32 2 + %10 = getelementptr inbounds [4 x i5], [4 x i5]* %x, i64 0, i64 3 + %11 = load i5, i5* %10 + %12 = insertelement <4 x i5> %9, i5 %11, i32 3 + store <4 x i5> %12, <4 x i5>* %y, align 4 + %13 = load <4 x i5>, <4 x i5>* %y + %14 = extractelement <4 x i5> %13, i32 3 + store i5 %14, i5* %z, align 1 + %15 = load i5, i5* %z, align 1 + %16 = icmp ne i5 %15, -1 + br i1 %16, label %Then, label %Else + +Then: ; preds = %Entry + call void @llvm.debugtrap() + br label %EndIf + +Else: ; preds = %Entry + br label %EndIf + +EndIf: ; preds = %Else, %Then + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + +; Function Attrs: nounwind +declare void @llvm.debugtrap()