Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -125,6 +125,16 @@ cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check")); +static cl::opt EnableReduceLoadOpStoreWidth( + "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), + cl::desc("DAG cominber enable reducing the width of load/op/store " + "sequence")); + +static cl::opt EnableShrinkLoadReplaceStoreWithStore( + "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), + cl::desc("DAG cominber enable load//store with " + "a narrower store")); + namespace { class DAGCombiner { @@ -15424,7 +15434,7 @@ // Y is known to provide just those bytes. If so, we try to replace the // load + replace + store sequence with a single (narrower) store, which makes // the load dead. - if (Opc == ISD::OR) { + if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) { std::pair MaskedLoad; MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); if (MaskedLoad.first) @@ -15440,6 +15450,9 @@ return NewST; } + if (!EnableReduceLoadOpStoreWidth) + return SDValue(); + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); Index: llvm/test/CodeGen/X86/clear-bitfield.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/clear-bitfield.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -combiner-reduce-load-op-store-width=false | FileCheck %s + +%struct.bit_fields = type { i32 } + +define void @clear_b1(%struct.bit_fields* %ptr) { +; CHECK-LABEL: clear_b1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andl $-2, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = bitcast %struct.bit_fields* %ptr to i32* + %bf.load = load i32, i32* %0 + %bf.clear = and i32 %bf.load, -2 + store i32 %bf.clear, i32* %0 + ret void +} + +define void @clear16(%struct.bit_fields* %ptr) { +; CHECK-LABEL: clear16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andw $-2, (%rdi) +; CHECK-NEXT: retq +entry: + %0 = bitcast %struct.bit_fields* %ptr to i16* + %bf.load = load i16, i16* %0 + %bf.clear = and i16 %bf.load, -2 + store i16 %bf.clear, i16* %0 + ret void +} Index: llvm/test/CodeGen/X86/disable-shrink-store.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/disable-shrink-store.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -combiner-shrink-load-replace-store-with-store=false | FileCheck %s + +define void @shrink(i16* %ptr) { +; CHECK-LABEL: shrink: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: orl $25600, %eax # imm = 0x6400 +; CHECK-NEXT: movw %ax, (%rdi) +; CHECK-NEXT: retq +entry: + %val = load i16, i16* %ptr + %masked_val = and i16 %val, 255 + %replaced_val = or i16 %masked_val, 25600 + store i16 %replaced_val, i16* %ptr + ret void +} +