Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7396,6 +7396,9 @@ /// See if the specified operand can be simplified with the knowledge that only /// the bits specified by Mask are used. If so, return the simpler operand, /// otherwise return a null SDValue. +/// +/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can +/// simplify nodes with multiple uses more aggressively.) SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { switch (V.getOpcode()) { default: break; @@ -7431,6 +7434,18 @@ return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } + break; + case ISD::AND: { + // X & -1 -> X (ignoring bits which aren't demanded). + APInt KnownZero, KnownOne; + DAG.computeKnownBits(V.getOperand(0), KnownZero, KnownOne); + if ((KnownOne & Mask) == Mask) + return V.getOperand(1); + DAG.computeKnownBits(V.getOperand(1), KnownZero, KnownOne); + if ((KnownOne & Mask) == Mask) + return V.getOperand(0); + break; + } } return SDValue(); } Index: test/CodeGen/ARM/illegal-bitfield-loadstore.ll =================================================================== --- test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -29,31 +29,24 @@ define void @i24_and_or(i24* %a) { ; LE-LABEL: i24_and_or: ; LE: @ BB#0: -; LE-NEXT: ldrb r1, [r0, #2] -; LE-NEXT: ldrh r2, [r0] -; LE-NEXT: orr r1, r2, r1, lsl #16 -; LE-NEXT: ldr r2, .LCPI1_0 +; LE-NEXT: ldrh r1, [r0] +; LE-NEXT: mov r2, #16256 +; LE-NEXT: orr r2, r2, #49152 ; LE-NEXT: orr r1, r1, #384 ; LE-NEXT: and r1, r1, r2 ; LE-NEXT: strh r1, [r0] -; LE-NEXT: lsr r1, r1, #16 -; LE-NEXT: strb r1, [r0, #2] ; LE-NEXT: mov pc, lr -; LE-NEXT: .p2align 2 -; LE-NEXT: @ BB#1: -; LE-NEXT: .LCPI1_0: -; LE-NEXT: .long 16777088 @ 0xffff80 ; ; BE-LABEL: i24_and_or: ; BE: @ BB#0: ; BE-NEXT: ldrh r1, [r0] ; BE-NEXT: mov r2, #384 ; BE-NEXT: orr r1, r2, r1, lsl #8 +; BE-NEXT: lsr r2, r1, #8 +; BE-NEXT: strh r2, [r0] ; BE-NEXT: ldr r2, .LCPI1_0 ; BE-NEXT: and r1, r1, r2 ; BE-NEXT: strb r1, [r0, #2] -; BE-NEXT: lsr r1, r1, #8 -; BE-NEXT: strh r1, [r0] ; BE-NEXT: mov pc, lr ; BE-NEXT: .p2align 2 ; BE-NEXT: @ BB#1: @@ -69,37 +62,24 @@ define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { ; LE-LABEL: i24_insert_bit: ; LE: @ BB#0: -; LE-NEXT: ldrb r2, [r0, #2] -; LE-NEXT: ldrh r3, [r0] -; LE-NEXT: orr r2, r3, r2, lsl #16 -; LE-NEXT: ldr r3, .LCPI2_0 +; LE-NEXT: ldrh r2, [r0] +; LE-NEXT: mov r3, #255 +; LE-NEXT: orr r3, r3, #57088 ; LE-NEXT: and r2, r2, r3 -; LE-NEXT: lsr r3, r2, #16 ; LE-NEXT: orr r1, r2, r1, lsl #13 -; LE-NEXT: strb r3, [r0, #2] ; LE-NEXT: strh r1, [r0] ; LE-NEXT: mov pc, lr -; LE-NEXT: .p2align 2 -; LE-NEXT: @ BB#1: -; LE-NEXT: .LCPI2_0: -; LE-NEXT: .long 16769023 @ 0xffdfff ; ; BE-LABEL: i24_insert_bit: ; BE: @ BB#0: ; BE-NEXT: ldrh r2, [r0] -; BE-NEXT: ldrb r3, [r0, #2] -; BE-NEXT: orr r2, r3, r2, lsl #8 -; BE-NEXT: ldr r3, .LCPI2_0 -; BE-NEXT: and r2, r2, r3 +; BE-NEXT: mov r3, #57088 +; BE-NEXT: orr r3, r3, #16711680 +; BE-NEXT: and r2, r3, r2, lsl #8 ; BE-NEXT: orr r1, r2, r1, lsl #13 -; BE-NEXT: strb r2, [r0, #2] ; BE-NEXT: lsr r1, r1, #8 ; BE-NEXT: strh r1, [r0] ; BE-NEXT: mov pc, lr -; BE-NEXT: .p2align 2 -; BE-NEXT: @ BB#1: -; BE-NEXT: .LCPI2_0: -; BE-NEXT: .long 16769023 @ 0xffdfff %extbit = zext i1 %bit to i24 %b = load i24, i24* %a, align 1 %extbit.shl = shl nuw nsw i24 %extbit, 13 @@ -186,14 +166,14 @@ ; BE-NEXT: .save {r11, lr} ; BE-NEXT: push {r11, lr} ; BE-NEXT: mov r2, r0 -; BE-NEXT: ldr lr, [r0] ; BE-NEXT: ldrh r12, [r2, #4]! ; BE-NEXT: ldrb r3, [r2, #2] +; BE-NEXT: strb r3, [r2, #2] ; BE-NEXT: orr r12, r3, r12, lsl #8 +; BE-NEXT: ldr lr, [r0] ; BE-NEXT: orr r3, r12, lr, lsl #24 ; BE-NEXT: bic r3, r3, #8192 ; BE-NEXT: orr r1, r3, r1, lsl #13 -; BE-NEXT: strb r3, [r2, #2] ; BE-NEXT: lsr r3, r1, #8 ; BE-NEXT: strh r3, [r2] ; BE-NEXT: bic r2, lr, #255 Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -329,7 +329,7 @@ ; KNL-LABEL: test13: ; KNL: ## BB#0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: setb %al +; KNL-NEXT: sbbb %al, %al ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k0 ; KNL-NEXT: movw $-4, %ax @@ -343,7 +343,7 @@ ; SKX-LABEL: test13: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: setb %al +; SKX-NEXT: sbbb %al, %al ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovw %eax, %k0 ; SKX-NEXT: movw $-4, %ax @@ -1260,7 +1260,7 @@ ; SKX-LABEL: test_insertelement_v32i1: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: setb %al +; SKX-NEXT: sbbb %al, %al ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovw %eax, %k0 ; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k1 @@ -1284,7 +1284,7 @@ ; KNL-LABEL: test_iinsertelement_v4i1: ; KNL: ## BB#0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: setb %al +; KNL-NEXT: sbbb %al, %al ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 @@ -1324,7 +1324,7 @@ ; SKX-LABEL: test_iinsertelement_v4i1: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: setb %al +; SKX-NEXT: sbbb %al, %al ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovw %eax, %k0 ; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k1 @@ -1347,7 +1347,7 @@ ; KNL-LABEL: test_iinsertelement_v2i1: ; KNL: ## BB#0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: setb %al +; KNL-NEXT: sbbb %al, %al ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] @@ -1369,7 +1369,7 @@ ; SKX-LABEL: test_iinsertelement_v2i1: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: setb %al +; SKX-NEXT: sbbb %al, %al ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: kmovw %eax, %k0 ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 Index: test/CodeGen/X86/illegal-bitfield-loadstore.ll =================================================================== --- test/CodeGen/X86/illegal-bitfield-loadstore.ll +++ test/CodeGen/X86/illegal-bitfield-loadstore.ll @@ -23,13 +23,12 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movzwl (%rdi), %eax ; CHECK-NEXT: movzbl 2(%rdi), %ecx +; CHECK-NEXT: movb %cl, 2(%rdi) ; CHECK-NEXT: shll $16, %ecx ; CHECK-NEXT: orl %eax, %ecx ; CHECK-NEXT: orl $384, %ecx # imm = 0x180 ; CHECK-NEXT: andl $16777088, %ecx # imm = 0xFFFF80 ; CHECK-NEXT: movw %cx, (%rdi) -; CHECK-NEXT: shrl $16, %ecx -; CHECK-NEXT: movb %cl, 2(%rdi) ; CHECK-NEXT: retq %b = load i24, i24* %a, align 1 %c = and i24 %b, -128 @@ -44,14 +43,13 @@ ; CHECK-NEXT: movzbl %sil, %eax ; CHECK-NEXT: movzwl (%rdi), %ecx ; CHECK-NEXT: movzbl 2(%rdi), %edx +; CHECK-NEXT: movb %dl, 2(%rdi) ; CHECK-NEXT: shll $16, %edx ; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: shll $13, %eax ; CHECK-NEXT: andl $16769023, %edx # imm = 0xFFDFFF -; CHECK-NEXT: orl %edx, %eax -; CHECK-NEXT: shrl $16, %edx -; CHECK-NEXT: movb %dl, 2(%rdi) -; CHECK-NEXT: movw %ax, (%rdi) +; CHECK-NEXT: orl %eax, %edx +; CHECK-NEXT: movw %dx, (%rdi) ; CHECK-NEXT: retq %extbit = zext i1 %bit to i24 %b = load i24, i24* %a, align 1 @@ -90,20 +88,19 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movzwl 4(%rdi), %eax ; CHECK-NEXT: movzbl 6(%rdi), %ecx +; CHECK-NEXT: movl (%rdi), %edx +; CHECK-NEXT: movb %cl, 6(%rdi) +; CHECK-NEXT: # kill: %ECX %ECX %RCX %RCX ; CHECK-NEXT: shll $16, %ecx ; CHECK-NEXT: orl %eax, %ecx ; CHECK-NEXT: shlq $32, %rcx -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: orq %rcx, %rax -; CHECK-NEXT: orq $384, %rax # imm = 0x180 -; CHECK-NEXT: movabsq $72057594037927808, %rcx # imm = 0xFFFFFFFFFFFF80 -; CHECK-NEXT: andq %rax, %rcx -; CHECK-NEXT: movl %ecx, (%rdi) -; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: orq $384, %rdx # imm = 0x180 +; CHECK-NEXT: movabsq $72057594037927808, %rax # imm = 0xFFFFFFFFFFFF80 +; CHECK-NEXT: andq %rdx, %rax +; CHECK-NEXT: movl %eax, (%rdi) ; CHECK-NEXT: shrq $32, %rax ; CHECK-NEXT: movw %ax, 4(%rdi) -; CHECK-NEXT: shrq $48, %rcx -; CHECK-NEXT: movb %cl, 6(%rdi) ; CHECK-NEXT: retq %b = load i56, i56* %a, align 1 %c = and i56 %b, -128 @@ -118,20 +115,20 @@ ; CHECK-NEXT: movzbl %sil, %eax ; CHECK-NEXT: movzwl 4(%rdi), %ecx ; CHECK-NEXT: movzbl 6(%rdi), %edx +; CHECK-NEXT: movl (%rdi), %esi +; CHECK-NEXT: movb %dl, 6(%rdi) +; CHECK-NEXT: # kill: %EDX %EDX %RDX %RDX ; CHECK-NEXT: shll $16, %edx ; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: shlq $32, %rdx -; CHECK-NEXT: movl (%rdi), %ecx -; CHECK-NEXT: orq %rdx, %rcx +; CHECK-NEXT: orq %rdx, %rsi ; CHECK-NEXT: shlq $13, %rax -; CHECK-NEXT: movabsq $72057594037919743, %rdx # imm = 0xFFFFFFFFFFDFFF -; CHECK-NEXT: andq %rcx, %rdx -; CHECK-NEXT: orq %rdx, %ra -; CHECK-NEXT: movl %eax, (%rdi) -; CHECK-NEXT: shrq $48, %rdx -; CHECK-NEXT: movb %dl, 6(%rdi) -; CHECK-NEXT: shrq $32, %rax -; CHECK-NEXT: movw %ax, 4(%rdi) +; CHECK-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF +; CHECK-NEXT: andq %rsi, %rcx +; CHECK-NEXT: orq %rax, %rcx +; CHECK-NEXT: movl %ecx, (%rdi) +; CHECK-NEXT: shrq $32, %rcx +; CHECK-NEXT: movw %cx, 4(%rdi) ; CHECK-NEXT: retq %extbit = zext i1 %bit to i56 %b = load i56, i56* %a, align 1 Index: test/CodeGen/X86/xaluo.ll =================================================================== --- test/CodeGen/X86/xaluo.ll +++ test/CodeGen/X86/xaluo.ll @@ -1409,7 +1409,6 @@ ; KNL-NEXT: subq %rdi, %rax ; KNL-NEXT: sbbb %dl, %dl ; KNL-NEXT: orb %cl, %dl -; KNL-NEXT: andb $1, %dl ; KNL-NEXT: retq %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %a) %v0 = extractvalue {i64, i1} %t0, 0