diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -40,6 +40,10 @@ cl::desc("Enable setting constant bits to reduce size of mask immediates"), cl::Hidden); +static cl::opt EnablePromoteAnyextLoad( + "x86-promote-anyext-load", cl::init(true), + cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden); + //===----------------------------------------------------------------------===// // Pattern Matcher Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -1122,7 +1122,7 @@ ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) return true; - if (ExtType == ISD::EXTLOAD) + if (ExtType == ISD::EXTLOAD && EnablePromoteAnyextLoad) return LD->getAlignment() >= 2 && LD->isSimple(); return false; }]>; @@ -1132,7 +1132,7 @@ ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) return true; - if (ExtType == ISD::EXTLOAD) + if (ExtType == ISD::EXTLOAD && EnablePromoteAnyextLoad) return LD->getAlignment() >= 4 && LD->isSimple(); return false; }]>; diff --git a/llvm/test/CodeGen/X86/no-wide-load.ll b/llvm/test/CodeGen/X86/no-wide-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/no-wide-load.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -x86-promote-anyext-load=false | FileCheck %s + +%struct.S = type { i32, i16, i16 } + +define void @foo(%struct.S* %p, i16 signext %s) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movzwl 4(%rdi), %eax +; CHECK-NEXT: andl $-1121, %eax # imm = 0xFB9F +; CHECK-NEXT: orl $1024, %eax # imm = 0x400 +; CHECK-NEXT: movw %ax, 4(%rdi) +; CHECK-NEXT: retq +entry: + %f2 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1 + %0 = load i16, i16* %f2, align 4 + %1 = and i16 %0, -1121 + %2 = or i16 %1, 1024 + store i16 %2, i16* %f2, align 4 + ret void +} +