Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -1132,7 +1132,7 @@ ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) return true; - if (ExtType == ISD::EXTLOAD) + if (ExtType == ISD::EXTLOAD && Subtarget->enableWideLoad()) return LD->getAlignment() >= 4 && LD->isSimple(); return false; }]>; Index: llvm/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/lib/Target/X86/X86Subtarget.h +++ llvm/lib/Target/X86/X86Subtarget.h @@ -907,6 +907,8 @@ } bool enableAdvancedRASplitCost() const override { return true; } + + bool enableWideLoad() const; }; } // end namespace llvm Index: llvm/lib/Target/X86/X86Subtarget.cpp =================================================================== --- llvm/lib/Target/X86/X86Subtarget.cpp +++ llvm/lib/Target/X86/X86Subtarget.cpp @@ -45,6 +45,10 @@ #define GET_SUBTARGETINFO_CTOR #include "X86GenSubtargetInfo.inc" +static cl::opt EnableWideLoad("x86-enalbe-wide-load", cl::init(true), + cl::desc("If load an 8b or 16b value from 32b aligned address, and any " + "extended value is expected, change it to 32b load."), cl::Hidden); + // Temporary option to control early if-conversion for x86 while adding machine // models. static cl::opt @@ -384,3 +388,7 @@ bool X86Subtarget::isPositionIndependent() const { return TM.isPositionIndependent(); } + +bool X86Subtarget::enableWideLoad() const { + return EnableWideLoad; +} Index: llvm/test/CodeGen/X86/no-wide-load.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/no-wide-load.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -x86-enalbe-wide-load=false | FileCheck %s + +%struct.S = type { i32, i16, i16 } + +define void @foo(%struct.S* %p, i16 signext %s) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movzwl 4(%rdi), %eax +; CHECK-NEXT: andl $-1121, %eax # imm = 0xFB9F +; CHECK-NEXT: orl $1024, %eax # imm = 0x400 +; CHECK-NEXT: movw %ax, 4(%rdi) +; CHECK-NEXT: retq +entry: + %f2 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1 + %0 = load i16, i16* %f2, align 4 + %1 = and i16 %0, -1121 + %2 = or i16 %1, 1024 + store i16 %2, i16* %f2, align 4 + ret void +} +