diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1673,6 +1673,16 @@ if ((ShAmt < DemandedBits.getActiveBits()) && ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; + } else { + // This is a variable shift, so we can't shift the demand mask by a known + // amount. But if we are not demanding high bits, then we are not + // demanding those bits from the pre-shifted operand either. + if (unsigned CTLZ = DemandedBits.countLeadingZeros()) { + APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ)); + if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO, Depth + 1)) + return true; + Known.resetAll(); + } } // If we are only demanding sign bits then we can use the shift source diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll --- a/llvm/test/CodeGen/AMDGPU/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.ll @@ -589,7 +589,7 @@ ; EG-NEXT: TEX 0 @8 ; EG-NEXT: ALU 0, @15, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @10 -; EG-NEXT: ALU 12, @16, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 11, @16, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T7.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -604,13 +604,12 @@ ; EG-NEXT: ALU clause starting at 15: ; EG-NEXT: MOV * T7.X, KC0[2].Z, ; EG-NEXT: ALU clause starting at 16: -; EG-NEXT: AND_INT T0.Y, T0.X, literal.x, -; EG-NEXT: AND_INT T0.Z, T7.X, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: AND_INT T0.Z, T0.X, literal.x, ; EG-NEXT: LSHR T0.W, T0.X, literal.y, ; EG-NEXT: LSHR * T1.W, T7.X, literal.y, ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) ; EG-NEXT: LSHL T0.W, PS, PV.W, -; EG-NEXT: LSHL * T1.W, PV.Z, PV.Y, +; EG-NEXT: LSHL * T1.W, T7.X, PV.Z, ; EG-NEXT: AND_INT T1.W, PS, literal.x, ; EG-NEXT: LSHL * T0.W, PV.W, literal.y, ; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) @@ -685,7 +684,7 @@ ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] ; EG-NEXT: TEX 0 @6 -; EG-NEXT: ALU 53, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: ALU 51, @11, KC0[CB0:0-32], KC1[] ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1 ; EG-NEXT: CF_END ; EG-NEXT: PAD @@ -704,10 +703,9 @@ ; EG-NEXT: MOV T3.X, T10.W, ; EG-NEXT: MOV * T0.Z, T6.X, ; EG-NEXT: MOV * T1.Y, T2.X, -; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, -; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x, ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) -; EG-NEXT: LSHL * T1.W, PS, PV.W, +; EG-NEXT: LSHL * T1.W, T0.X, PV.W, ; EG-NEXT: AND_INT T1.W, PV.W, literal.x, ; EG-NEXT: AND_INT * T2.W, T0.Z, literal.y, ; EG-NEXT: 65535(9.183409e-41), -65536(nan) @@ -726,10 +724,9 @@ ; EG-NEXT: OR_INT * T1.W, T2.W, PV.W, ; EG-NEXT: MOV T6.X, PV.W, ; EG-NEXT: MOV * T0.X, T7.X, -; EG-NEXT: AND_INT T1.W, T0.Z, literal.x, -; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x, +; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x, ; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) -; EG-NEXT: LSHL T1.W, PS, PV.W, +; EG-NEXT: LSHL T1.W, T0.Y, PV.W, ; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, ; EG-NEXT: -65536(nan), 0(0.000000e+00) ; EG-NEXT: AND_INT * T1.W, PV.W, literal.x, diff --git a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll --- a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll @@ -21,18 +21,16 @@ ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: s_mov_b32 s6, 0xffff ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_and_b32 s7, s4, s6 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: s_lshr_b32 s8, s5, 16 -; VI-NEXT: s_lshl_b32 s4, s4, s8 -; VI-NEXT: s_lshl_b32 s5, s7, s5 -; VI-NEXT: s_lshl_b32 s4, s4, 16 -; VI-NEXT: s_and_b32 s5, s5, s6 -; VI-NEXT: s_or_b32 s4, s5, s4 +; VI-NEXT: s_lshr_b32 s6, s4, 16 +; VI-NEXT: s_lshr_b32 s7, s5, 16 +; VI-NEXT: s_lshl_b32 s6, s6, s7 +; VI-NEXT: s_lshl_b32 s4, s4, s5 +; VI-NEXT: s_lshl_b32 s5, s6, 16 +; VI-NEXT: s_and_b32 s4, s4, 0xffff +; VI-NEXT: s_or_b32 s4, s4, s5 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/RISCV/alu16.ll b/llvm/test/CodeGen/RISCV/alu16.ll --- a/llvm/test/CodeGen/RISCV/alu16.ll +++ b/llvm/test/CodeGen/RISCV/alu16.ll @@ -213,16 +213,12 @@ define void @sll_ext(i16 %a, i16 %b, i16* %p) nounwind { ; RV32I-LABEL: sll_ext: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: sh a0, 0(a2) ; RV32I-NEXT: ret ; ; RV64I-LABEL: sll_ext: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a0, a0, 48 -; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: sllw a0, a0, a1 ; RV64I-NEXT: sh a0, 0(a2) ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/alu8.ll b/llvm/test/CodeGen/RISCV/alu8.ll --- a/llvm/test/CodeGen/RISCV/alu8.ll +++ b/llvm/test/CodeGen/RISCV/alu8.ll @@ -211,14 +211,12 @@ define void @sll_ext(i8 %a, i8 %b, i8* %p) nounwind { ; RV32I-LABEL: sll_ext: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a0, a0, 255 ; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: ret ; ; RV64I-LABEL: sll_ext: ; RV64I: # %bb.0: -; RV64I-NEXT: andi a0, a0, 255 ; RV64I-NEXT: sllw a0, a0, a1 ; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -160,80 +160,81 @@ ; CHECK-NEXT: add r7, sp, #12 ; CHECK-NEXT: .save {r8, r9, r10, r11} ; CHECK-NEXT: push.w {r8, r9, r10, r11} -; CHECK-NEXT: .pad #12 -; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: wls lr, r1, .LBB2_3 ; CHECK-NEXT: @ %bb.1: @ %while.body.preheader -; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: adds r2, r3, #4 -; CHECK-NEXT: add.w r9, r0, #4 -; CHECK-NEXT: mvn r11, #1 -; CHECK-NEXT: @ implicit-def: $r6 -; CHECK-NEXT: @ implicit-def: $r12 -; CHECK-NEXT: str r4, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: add.w r10, r3, #4 +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: mvn r9, #1 +; CHECK-NEXT: @ implicit-def: $r8 +; CHECK-NEXT: @ implicit-def: $r4 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill ; CHECK-NEXT: .LBB2_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r1, [r9, #-4] -; CHECK-NEXT: ldr.w r10, [r2] -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: muls r1, r3, r1 -; CHECK-NEXT: adds.w r8, r1, #-2147483648 -; CHECK-NEXT: asr.w r5, r1, #31 -; CHECK-NEXT: adc r1, r5, #0 -; CHECK-NEXT: mul r5, r10, r0 -; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: ldr.w r2, [r11, #4] -; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: add.w r5, r5, #-2147483648 -; CHECK-NEXT: asrl r8, r1, r5 -; CHECK-NEXT: smull r4, r5, r10, r8 -; CHECK-NEXT: lsll r4, r5, #30 -; CHECK-NEXT: asrs r1, r5, #31 -; CHECK-NEXT: mov r4, r5 -; CHECK-NEXT: lsll r4, r1, r10 -; CHECK-NEXT: lsll r4, r1, #30 -; CHECK-NEXT: ldr.w r4, [r11] -; CHECK-NEXT: asrs r5, r1, #31 -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: muls r4, r6, r4 -; CHECK-NEXT: adds r4, #2 -; CHECK-NEXT: lsll r8, r5, r4 -; CHECK-NEXT: ldr r4, [r9], #4 -; CHECK-NEXT: asr.w r5, r12, #31 -; CHECK-NEXT: add.w r8, r8, #-2147483648 +; CHECK-NEXT: ldr r2, [r0] +; CHECK-NEXT: asrs r5, r4, #31 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: muls r2, r3, r2 +; CHECK-NEXT: adds r4, r4, r2 +; CHECK-NEXT: adc.w r2, r5, r2, asr #31 +; CHECK-NEXT: ldr.w r5, [r9, #4] +; CHECK-NEXT: adds.w r4, r4, #-2147483648 +; CHECK-NEXT: adc r1, r2, #0 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: smull r5, r6, r5, r8 +; CHECK-NEXT: ldr.w r2, [r9] +; CHECK-NEXT: asrs r4, r1, #31 +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: subs r5, r1, r5 +; CHECK-NEXT: sbcs r4, r6 +; CHECK-NEXT: adds.w r6, r5, #-2147483648 +; CHECK-NEXT: adc r5, r4, #0 +; CHECK-NEXT: ldr r4, [r0, #-4] ; CHECK-NEXT: muls r4, r3, r4 ; CHECK-NEXT: adds r3, #4 -; CHECK-NEXT: adds.w r1, r12, r4 -; CHECK-NEXT: adc.w r5, r5, r4, asr #31 -; CHECK-NEXT: smull r6, r4, r2, r6 -; CHECK-NEXT: adds.w r1, r1, #-2147483648 -; CHECK-NEXT: adc r1, r5, #0 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: asrs r5, r1, #31 -; CHECK-NEXT: subs r6, r1, r6 -; CHECK-NEXT: sbcs r5, r4 -; CHECK-NEXT: adds.w r6, r6, #-2147483648 -; CHECK-NEXT: adc r5, r5, #0 -; CHECK-NEXT: asrl r6, r5, r8 +; CHECK-NEXT: adds.w r0, r4, #-2147483648 +; CHECK-NEXT: asr.w r1, r4, #31 +; CHECK-NEXT: ldr.w r4, [r10] +; CHECK-NEXT: adc r1, r1, #0 +; CHECK-NEXT: mul r2, r4, r12 +; CHECK-NEXT: add.w r12, r12, #4 +; CHECK-NEXT: add.w r2, r2, #-2147483648 +; CHECK-NEXT: asrl r0, r1, r2 +; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: smull r0, r1, r4, r0 +; CHECK-NEXT: lsll r0, r1, #30 +; CHECK-NEXT: asr.w r11, r1, #31 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: lsll r0, r11, r4 +; CHECK-NEXT: lsrl r0, r11, #2 +; CHECK-NEXT: mul r1, r1, r8 +; CHECK-NEXT: adds r1, #2 +; CHECK-NEXT: lsll r0, r11, r1 +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: add.w r0, r0, #-2147483648 +; CHECK-NEXT: asrl r6, r5, r0 +; CHECK-NEXT: movs r0, #2 ; CHECK-NEXT: lsrl r6, r5, #2 -; CHECK-NEXT: movs r5, #2 -; CHECK-NEXT: str r6, [r5] -; CHECK-NEXT: ldr r5, [r11], #-4 -; CHECK-NEXT: mls r1, r5, r10, r1 -; CHECK-NEXT: adds.w r12, r1, #-2147483648 -; CHECK-NEXT: asr.w r4, r1, #31 -; CHECK-NEXT: adc r1, r4, #0 -; CHECK-NEXT: ldrd r4, r0, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: lsrl r12, r1, #2 -; CHECK-NEXT: rsb.w r1, r12, #0 +; CHECK-NEXT: str r6, [r0] +; CHECK-NEXT: mov r8, r6 +; CHECK-NEXT: ldr r0, [r9], #-4 +; CHECK-NEXT: mls r0, r0, r4, r1 +; CHECK-NEXT: adds.w r4, r0, #-2147483648 +; CHECK-NEXT: asr.w r1, r0, #31 +; CHECK-NEXT: adc r1, r1, #0 +; CHECK-NEXT: lsrl r4, r1, #2 +; CHECK-NEXT: rsbs r0, r4, #0 +; CHECK-NEXT: str r0, [r2] +; CHECK-NEXT: str r0, [r10, #-4] +; CHECK-NEXT: add.w r10, r10, #4 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: adds r0, #4 -; CHECK-NEXT: str r1, [r4] -; CHECK-NEXT: str r1, [r2, #-4] -; CHECK-NEXT: adds r2, #4 ; CHECK-NEXT: le lr, .LBB2_2 ; CHECK-NEXT: .LBB2_3: @ %while.end -; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: pop.w {r8, r9, r10, r11} ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: