diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1761,6 +1761,14 @@ } APInt InDemandedMask = DemandedBits.lshr(ShAmt); + + // If the shift is NUW/NSW, then it does demand the high bits. + SDNodeFlags Flags = Op.getNode()->getFlags(); + if (Flags.hasNoSignedWrap()) + InDemandedMask.setHighBits(ShAmt + 1); + else if (Flags.hasNoUnsignedWrap()) + InDemandedMask.setHighBits(ShAmt); + if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) return true; diff --git a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll --- a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll +++ b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll @@ -195,8 +195,9 @@ define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp { ; CHECK-LABEL: extendedLeftShiftshortTointBy16: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: lsl w8, w0, #16 -; CHECK-NEXT: add w0, w8, #16, lsl #12 ; =65536 +; CHECK-NEXT: add w8, w0, #1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: lsl w0, w8, #16 ; CHECK-NEXT: ret entry: %inc = add i16 %a, 1 diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll --- a/llvm/test/CodeGen/AArch64/load-combine.ll +++ b/llvm/test/CodeGen/AArch64/load-combine.ll @@ -578,7 +578,7 @@ ; CHECK-NEXT: umov w10, v0.h[3] ; CHECK-NEXT: lsl w8, w8, #16 ; CHECK-NEXT: bfi w8, w9, #8, #8 -; CHECK-NEXT: orr w8, w8, w10, lsl #24 +; CHECK-NEXT: bfi w8, w10, #24, #8 ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %ld = load <4 x i8>, ptr %in, align 4 @@ -609,8 +609,8 @@ ; CHECK-NEXT: ldrh w9, [x0] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: orr w8, w9, w8, lsl #16 -; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: bfi w9, w8, #16, #8 +; CHECK-NEXT: str w9, [x1] ; CHECK-NEXT: ret %ld = load <4 x i8>, ptr %in, align 4 @@ -640,7 +640,7 @@ ; CHECK-NEXT: umov w8, v0.h[3] ; CHECK-NEXT: umov w9, v0.h[2] ; CHECK-NEXT: lsl w8, w8, #24 -; CHECK-NEXT: orr w8, w8, w9, lsl #16 +; CHECK-NEXT: bfi w8, w9, #16, #8 ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %ld = load <4 x i8>, ptr %in, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll --- a/llvm/test/CodeGen/AMDGPU/shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.ll @@ -489,6 +489,7 @@ ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s4, s4, 15 ; VI-NEXT: s_lshl_b32 s4, s4, 12 ; VI-NEXT: v_mov_b32_e32 v0, s4 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -45,9 +45,7 @@ ; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[40:41], v3, v4 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[40:41] -; GFX9-O0-NEXT: s_mov_b32 s35, 1 -; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s35, v3 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[40:41] ; GFX9-O0-NEXT: s_mov_b32 s35, 2 ; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s35 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[36:39], s34 offset:4 @@ -93,7 +91,6 @@ ; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 ; GFX9-O3-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-O3-NEXT: v_lshlrev_b32_e32 v4, 1, v4 -; GFX9-O3-NEXT: v_and_b32_e32 v4, 2, v4 ; GFX9-O3-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload @@ -208,12 +205,11 @@ ; GFX9-O0-NEXT: v_readlane_b32 s39, v3, 3 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[34:35], v0, v4 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] -; GFX9-O0-NEXT: s_mov_b32 s34, 1 -; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s34, v0 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[34:35] ; GFX9-O0-NEXT: s_mov_b32 s34, 2 ; GFX9-O0-NEXT: v_and_b32_e64 v0, v0, s34 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 +; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload @@ -266,7 +262,6 @@ ; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 ; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX9-O3-NEXT: v_and_b32_e32 v0, 2, v0 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -21,20 +21,18 @@ ; P9LE-NEXT: add 5, 3, 4 ; P9LE-NEXT: lfdx 0, 3, 4 ; P9LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; P9LE-NEXT: xxlxor 2, 2, 2 +; P9LE-NEXT: xxlxor 3, 3, 3 ; P9LE-NEXT: vspltisw 4, 8 -; P9LE-NEXT: lxsd 3, 4(5) ; P9LE-NEXT: addi 3, 3, .LCPI0_0@toc@l -; P9LE-NEXT: vadduwm 4, 4, 4 ; P9LE-NEXT: lxv 1, 0(3) -; P9LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha -; P9LE-NEXT: addi 3, 3, .LCPI0_1@toc@l +; P9LE-NEXT: xxlxor 2, 2, 2 +; P9LE-NEXT: vadduwm 4, 4, 4 +; P9LE-NEXT: xxperm 3, 0, 1 +; P9LE-NEXT: lfd 0, 4(5) ; P9LE-NEXT: xxperm 2, 0, 1 -; P9LE-NEXT: lxv 0, 0(3) -; P9LE-NEXT: xxperm 3, 3, 0 -; P9LE-NEXT: vnegw 3, 3 -; P9LE-NEXT: vslw 3, 3, 4 -; P9LE-NEXT: vsubuwm 2, 3, 2 +; P9LE-NEXT: vnegw 2, 2 +; P9LE-NEXT: vslw 2, 2, 4 +; P9LE-NEXT: vsubuwm 2, 2, 3 ; P9LE-NEXT: xxswapd 0, 2 ; P9LE-NEXT: stxv 0, 0(3) ; P9LE-NEXT: blr @@ -42,22 +40,20 @@ ; P9BE-LABEL: test64: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: add 5, 3, 4 -; P9BE-NEXT: lxsdx 2, 3, 4 +; P9BE-NEXT: lfdx 0, 3, 4 ; P9BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; P9BE-NEXT: xxlxor 1, 1, 1 +; P9BE-NEXT: xxlxor 3, 3, 3 ; P9BE-NEXT: vspltisw 4, 8 -; P9BE-NEXT: lxsd 3, 4(5) ; P9BE-NEXT: addi 3, 3, .LCPI0_0@toc@l +; P9BE-NEXT: lxv 1, 0(3) +; P9BE-NEXT: xxlxor 2, 2, 2 ; P9BE-NEXT: vadduwm 4, 4, 4 -; P9BE-NEXT: lxv 0, 0(3) -; P9BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha -; P9BE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; P9BE-NEXT: xxperm 2, 1, 0 -; P9BE-NEXT: lxv 0, 0(3) -; P9BE-NEXT: xxperm 3, 3, 0 -; P9BE-NEXT: vnegw 3, 3 -; P9BE-NEXT: vslw 3, 3, 4 -; P9BE-NEXT: vsubuwm 2, 3, 2 +; P9BE-NEXT: xxperm 3, 0, 1 +; P9BE-NEXT: lfd 0, 4(5) +; P9BE-NEXT: xxperm 2, 0, 1 +; P9BE-NEXT: vnegw 2, 2 +; P9BE-NEXT: vslw 2, 2, 4 +; P9BE-NEXT: vsubuwm 2, 2, 3 ; P9BE-NEXT: xxswapd 0, 2 ; P9BE-NEXT: stxv 0, 0(3) ; P9BE-NEXT: blr @@ -65,20 +61,19 @@ ; P9BE-AIX-LABEL: test64: ; P9BE-AIX: # %bb.0: # %entry ; P9BE-AIX-NEXT: add 5, 3, 4 -; P9BE-AIX-NEXT: lxsdx 2, 3, 4 +; P9BE-AIX-NEXT: lfdx 0, 3, 4 ; P9BE-AIX-NEXT: ld 3, L..C0(2) # %const.0 -; P9BE-AIX-NEXT: xxlxor 1, 1, 1 +; P9BE-AIX-NEXT: xxlxor 3, 3, 3 +; P9BE-AIX-NEXT: xxlxor 2, 2, 2 ; P9BE-AIX-NEXT: vspltisw 4, 8 -; P9BE-AIX-NEXT: lxsd 3, 4(5) -; P9BE-AIX-NEXT: lxv 0, 0(3) -; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.1 ; P9BE-AIX-NEXT: vadduwm 4, 4, 4 -; P9BE-AIX-NEXT: xxperm 2, 1, 0 -; P9BE-AIX-NEXT: lxv 0, 0(3) -; P9BE-AIX-NEXT: xxperm 3, 3, 0 -; P9BE-AIX-NEXT: vnegw 3, 3 -; P9BE-AIX-NEXT: vslw 3, 3, 4 -; P9BE-AIX-NEXT: vsubuwm 2, 3, 2 +; P9BE-AIX-NEXT: lxv 1, 0(3) +; P9BE-AIX-NEXT: xxperm 3, 0, 1 +; P9BE-AIX-NEXT: lfd 0, 4(5) +; P9BE-AIX-NEXT: xxperm 2, 0, 1 +; P9BE-AIX-NEXT: vnegw 2, 2 +; P9BE-AIX-NEXT: vslw 2, 2, 4 +; P9BE-AIX-NEXT: vsubuwm 2, 2, 3 ; P9BE-AIX-NEXT: xxswapd 0, 2 ; P9BE-AIX-NEXT: stxv 0, 0(3) ; P9BE-AIX-NEXT: blr @@ -86,10 +81,9 @@ ; P9BE-AIX32-LABEL: test64: ; P9BE-AIX32: # %bb.0: # %entry ; P9BE-AIX32-NEXT: lwzux 4, 3, 4 -; P9BE-AIX32-NEXT: xxlxor 2, 2, 2 -; P9BE-AIX32-NEXT: vspltisw 4, 8 +; P9BE-AIX32-NEXT: xxlxor 4, 4, 4 +; P9BE-AIX32-NEXT: xxlxor 3, 3, 3 ; P9BE-AIX32-NEXT: stw 4, -48(1) -; P9BE-AIX32-NEXT: vadduwm 4, 4, 4 ; P9BE-AIX32-NEXT: lwz 4, 4(3) ; P9BE-AIX32-NEXT: lxv 0, -48(1) ; P9BE-AIX32-NEXT: stw 4, -32(1) @@ -97,17 +91,17 @@ ; P9BE-AIX32-NEXT: lxv 1, -32(1) ; P9BE-AIX32-NEXT: lwz 3, 8(3) ; P9BE-AIX32-NEXT: stw 3, -16(1) -; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.1 ; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1 ; P9BE-AIX32-NEXT: lxv 0, 0(4) -; P9BE-AIX32-NEXT: xxperm 2, 2, 0 -; P9BE-AIX32-NEXT: lxv 0, -16(1) -; P9BE-AIX32-NEXT: xxmrghw 3, 1, 0 -; P9BE-AIX32-NEXT: lxv 0, 0(3) -; P9BE-AIX32-NEXT: xxperm 3, 3, 0 -; P9BE-AIX32-NEXT: vnegw 3, 3 -; P9BE-AIX32-NEXT: vslw 3, 3, 4 -; P9BE-AIX32-NEXT: vsubuwm 2, 3, 2 +; P9BE-AIX32-NEXT: lxv 2, -16(1) +; P9BE-AIX32-NEXT: xxperm 4, 2, 0 +; P9BE-AIX32-NEXT: xxmrghw 2, 1, 2 +; P9BE-AIX32-NEXT: xxperm 3, 2, 0 +; P9BE-AIX32-NEXT: vnegw 2, 3 +; P9BE-AIX32-NEXT: vspltisw 3, 8 +; P9BE-AIX32-NEXT: vadduwm 3, 3, 3 +; P9BE-AIX32-NEXT: vslw 2, 2, 3 +; P9BE-AIX32-NEXT: vsubuwm 2, 2, 4 ; P9BE-AIX32-NEXT: xxswapd 0, 2 ; P9BE-AIX32-NEXT: stxv 0, 0(3) ; P9BE-AIX32-NEXT: blr @@ -182,7 +176,7 @@ ; P9BE-AIX: # %bb.0: # %entry ; P9BE-AIX-NEXT: add 5, 3, 4 ; P9BE-AIX-NEXT: lfiwzx 0, 3, 4 -; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0 +; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.0 ; P9BE-AIX-NEXT: xxlxor 3, 3, 3 ; P9BE-AIX-NEXT: xxlxor 2, 2, 2 ; P9BE-AIX-NEXT: vspltisw 4, 8 @@ -203,7 +197,7 @@ ; P9BE-AIX32: # %bb.0: # %entry ; P9BE-AIX32-NEXT: add 5, 3, 4 ; P9BE-AIX32-NEXT: lfiwzx 0, 3, 4 -; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0 +; P9BE-AIX32-NEXT: lwz 3, L..C1(2) # %const.0 ; P9BE-AIX32-NEXT: xxlxor 3, 3, 3 ; P9BE-AIX32-NEXT: xxlxor 2, 2, 2 ; P9BE-AIX32-NEXT: vspltisw 4, 8 @@ -302,7 +296,7 @@ ; P9BE-AIX-NEXT: li 7, 16 ; P9BE-AIX-NEXT: add 6, 3, 4 ; P9BE-AIX-NEXT: lxsihzx 0, 6, 7 -; P9BE-AIX-NEXT: ld 6, L..C3(2) # %const.0 +; P9BE-AIX-NEXT: ld 6, L..C2(2) # %const.0 ; P9BE-AIX-NEXT: lxv 1, 0(6) ; P9BE-AIX-NEXT: li 6, 0 ; P9BE-AIX-NEXT: mtvsrwz 2, 6 @@ -310,7 +304,7 @@ ; P9BE-AIX-NEXT: vsplth 4, 2, 3 ; P9BE-AIX-NEXT: xxperm 3, 0, 1 ; P9BE-AIX-NEXT: lxsihzx 0, 3, 4 -; P9BE-AIX-NEXT: ld 3, L..C4(2) # %const.1 +; P9BE-AIX-NEXT: ld 3, L..C3(2) # %const.1 ; P9BE-AIX-NEXT: xxperm 2, 0, 1 ; P9BE-AIX-NEXT: lxv 0, 0(3) ; P9BE-AIX-NEXT: li 3, 0 @@ -334,7 +328,7 @@ ; P9BE-AIX32-NEXT: sth 4, -48(1) ; P9BE-AIX32-NEXT: lxv 4, -48(1) ; P9BE-AIX32-NEXT: sth 3, -32(1) -; P9BE-AIX32-NEXT: lwz 3, L..C3(2) # %const.0 +; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0 ; P9BE-AIX32-NEXT: lxv 3, -32(1) ; P9BE-AIX32-NEXT: vmrghh 4, 2, 4 ; P9BE-AIX32-NEXT: lxv 0, 0(3) @@ -446,16 +440,16 @@ ; P9BE-AIX-NEXT: add 6, 3, 4 ; P9BE-AIX-NEXT: li 7, 8 ; P9BE-AIX-NEXT: lxsibzx 0, 6, 7 -; P9BE-AIX-NEXT: ld 6, L..C5(2) # %const.0 +; P9BE-AIX-NEXT: ld 6, L..C4(2) # %const.0 ; P9BE-AIX-NEXT: lxv 1, 0(6) ; P9BE-AIX-NEXT: li 6, 0 ; P9BE-AIX-NEXT: mtvsrwz 2, 6 ; P9BE-AIX-NEXT: vspltb 3, 2, 7 ; P9BE-AIX-NEXT: xxperm 0, 2, 1 ; P9BE-AIX-NEXT: lxsibzx 1, 3, 4 -; P9BE-AIX-NEXT: ld 3, L..C6(2) # %const.1 +; P9BE-AIX-NEXT: ld 3, L..C5(2) # %const.1 ; P9BE-AIX-NEXT: lxv 2, 0(3) -; P9BE-AIX-NEXT: ld 3, L..C7(2) # %const.2 +; P9BE-AIX-NEXT: ld 3, L..C6(2) # %const.2 ; P9BE-AIX-NEXT: xxperm 2, 1, 2 ; P9BE-AIX-NEXT: xxspltw 1, 3, 0 ; P9BE-AIX-NEXT: vmrghh 2, 2, 3 @@ -475,16 +469,16 @@ ; P9BE-AIX32-NEXT: add 6, 3, 4 ; P9BE-AIX32-NEXT: li 7, 8 ; P9BE-AIX32-NEXT: lxsibzx 0, 6, 7 -; P9BE-AIX32-NEXT: lwz 6, L..C4(2) # %const.0 +; P9BE-AIX32-NEXT: lwz 6, L..C3(2) # %const.0 ; P9BE-AIX32-NEXT: lxv 1, 0(6) ; P9BE-AIX32-NEXT: li 6, 0 ; P9BE-AIX32-NEXT: mtvsrwz 2, 6 ; P9BE-AIX32-NEXT: vspltb 3, 2, 7 ; P9BE-AIX32-NEXT: xxperm 0, 2, 1 ; P9BE-AIX32-NEXT: lxsibzx 1, 3, 4 -; P9BE-AIX32-NEXT: lwz 3, L..C5(2) # %const.1 +; P9BE-AIX32-NEXT: lwz 3, L..C4(2) # %const.1 ; P9BE-AIX32-NEXT: lxv 2, 0(3) -; P9BE-AIX32-NEXT: lwz 3, L..C6(2) # %const.2 +; P9BE-AIX32-NEXT: lwz 3, L..C5(2) # %const.2 ; P9BE-AIX32-NEXT: xxperm 2, 1, 2 ; P9BE-AIX32-NEXT: xxspltw 1, 3, 0 ; P9BE-AIX32-NEXT: vmrghh 2, 2, 3 diff --git a/llvm/test/CodeGen/RISCV/aext-to-zext.ll b/llvm/test/CodeGen/RISCV/aext-to-zext.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/aext-to-zext.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I + +; We prefer to zero extend for zextload. The default behavior in +; TargetLowering::SimplifyDemandedBits is convert zero_extend into any_extend. +define zeroext i16 @read(ptr nocapture noundef readonly %adr) { +; RV64I-LABEL: read: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lbu a1, 1(a0) +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +entry: + %0 = load i8, ptr %adr, align 1 + %conv = zext i8 %0 to i16 + %arrayidx1 = getelementptr inbounds i8, ptr %adr, i64 1 + %1 = load i8, ptr %arrayidx1, align 1 + %conv2 = zext i8 %1 to i16 + %shl = shl nuw i16 %conv2, 8 + %or = or i16 %shl, %conv + ret i16 %or +} diff --git a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll --- a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll @@ -20,6 +20,7 @@ ; CHECK-NEXT: mv a0, s0 ; CHECK-NEXT: mv a1, s1 ; CHECK-NEXT: call __addsf3@plt +; CHECK-NEXT: andi a0, a0, -1 ; CHECK-NEXT: slli a0, a0, 32 ; CHECK-NEXT: slli s2, s2, 32 ; CHECK-NEXT: srli a1, s2, 32 diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -1221,14 +1221,14 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: addl $3, %ecx -; X32-NEXT: adcl $0, %edx -; X32-NEXT: adcl $0, %esi +; X32-NEXT: addl $3, %esi ; X32-NEXT: adcl $0, %edi -; X32-NEXT: movl %esi, 8(%eax) -; X32-NEXT: movl %edx, 4(%eax) -; X32-NEXT: movl %ecx, (%eax) -; X32-NEXT: movl %edi, 12(%eax) +; X32-NEXT: adcl $0, %ecx +; X32-NEXT: adcl $0, %edx +; X32-NEXT: movl %ecx, 8(%eax) +; X32-NEXT: movl %edi, 4(%eax) +; X32-NEXT: movl %esi, (%eax) +; X32-NEXT: movl %edx, 12(%eax) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: retl $4 diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll --- a/llvm/test/CodeGen/X86/parity.ll +++ b/llvm/test/CodeGen/X86/parity.ll @@ -404,21 +404,46 @@ define i16 @parity_16_shift(i16 %0) { ; X86-NOPOPCNT-LABEL: parity_16_shift: ; X86-NOPOPCNT: # %bb.0: -; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOPOPCNT-NEXT: xorl %eax, %eax -; X86-NOPOPCNT-NEXT: xorb %ch, %cl -; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: shrl %ecx +; X86-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555 +; X86-NOPOPCNT-NEXT: subl %ecx, %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333 +; X86-NOPOPCNT-NEXT: shrl $2, %eax +; X86-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333 +; X86-NOPOPCNT-NEXT: addl %ecx, %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: shrl $4, %ecx +; X86-NOPOPCNT-NEXT: addl %eax, %ecx +; X86-NOPOPCNT-NEXT: movl %ecx, %eax +; X86-NOPOPCNT-NEXT: shrl $8, %eax +; X86-NOPOPCNT-NEXT: addl %ecx, %eax ; X86-NOPOPCNT-NEXT: addl %eax, %eax +; X86-NOPOPCNT-NEXT: andl $2, %eax ; X86-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NOPOPCNT-NEXT: retl ; ; X64-NOPOPCNT-LABEL: parity_16_shift: ; X64-NOPOPCNT: # %bb.0: -; X64-NOPOPCNT-NEXT: movl %edi, %ecx -; X64-NOPOPCNT-NEXT: xorl %eax, %eax -; X64-NOPOPCNT-NEXT: xorb %ch, %cl -; X64-NOPOPCNT-NEXT: setnp %al +; X64-NOPOPCNT-NEXT: movl %edi, %eax +; X64-NOPOPCNT-NEXT: shrl %eax +; X64-NOPOPCNT-NEXT: andl $21845, %eax # imm = 0x5555 +; X64-NOPOPCNT-NEXT: subl %eax, %edi +; X64-NOPOPCNT-NEXT: movl %edi, %eax +; X64-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333 +; X64-NOPOPCNT-NEXT: shrl $2, %edi +; X64-NOPOPCNT-NEXT: andl $13107, %edi # imm = 0x3333 +; X64-NOPOPCNT-NEXT: addl %edi, %eax +; X64-NOPOPCNT-NEXT: movl %eax, %ecx +; X64-NOPOPCNT-NEXT: shrl $4, %ecx +; X64-NOPOPCNT-NEXT: addl %eax, %ecx +; X64-NOPOPCNT-NEXT: movl %ecx, %eax +; X64-NOPOPCNT-NEXT: shrl $8, %eax +; X64-NOPOPCNT-NEXT: addl %ecx, %eax ; X64-NOPOPCNT-NEXT: addl %eax, %eax +; X64-NOPOPCNT-NEXT: andl $2, %eax ; X64-NOPOPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NOPOPCNT-NEXT: retq ; @@ -426,8 +451,8 @@ ; X86-POPCNT: # %bb.0: ; X86-POPCNT-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-POPCNT-NEXT: popcntl %eax, %eax -; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: addl %eax, %eax +; X86-POPCNT-NEXT: andl $2, %eax ; X86-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X86-POPCNT-NEXT: retl ; @@ -435,8 +460,8 @@ ; X64-POPCNT: # %bb.0: ; X64-POPCNT-NEXT: movzwl %di, %eax ; X64-POPCNT-NEXT: popcntl %eax, %eax -; X64-POPCNT-NEXT: andl $1, %eax ; X64-POPCNT-NEXT: addl %eax, %eax +; X64-POPCNT-NEXT: andl $2, %eax ; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax ; X64-POPCNT-NEXT: retq %2 = tail call i16 @llvm.ctpop.i16(i16 %0) @@ -510,37 +535,55 @@ ; X86-NOPOPCNT: # %bb.0: ; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOPOPCNT-NEXT: movl %eax, %ecx -; X86-NOPOPCNT-NEXT: shrl $16, %ecx -; X86-NOPOPCNT-NEXT: xorl %eax, %ecx -; X86-NOPOPCNT-NEXT: xorl %eax, %eax -; X86-NOPOPCNT-NEXT: xorb %ch, %cl -; X86-NOPOPCNT-NEXT: setnp %al -; X86-NOPOPCNT-NEXT: addl %eax, %eax +; X86-NOPOPCNT-NEXT: shrl %ecx +; X86-NOPOPCNT-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NOPOPCNT-NEXT: subl %ecx, %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NOPOPCNT-NEXT: shrl $2, %eax +; X86-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NOPOPCNT-NEXT: addl %ecx, %eax +; X86-NOPOPCNT-NEXT: movl %eax, %ecx +; X86-NOPOPCNT-NEXT: shrl $4, %ecx +; X86-NOPOPCNT-NEXT: addl %eax, %ecx +; X86-NOPOPCNT-NEXT: andl $17764111, %ecx # imm = 0x10F0F0F +; X86-NOPOPCNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101 +; X86-NOPOPCNT-NEXT: shrl $23, %eax +; X86-NOPOPCNT-NEXT: andl $2, %eax ; X86-NOPOPCNT-NEXT: retl ; ; X64-NOPOPCNT-LABEL: parity_32_shift: ; X64-NOPOPCNT: # %bb.0: -; X64-NOPOPCNT-NEXT: movl %edi, %ecx -; X64-NOPOPCNT-NEXT: shrl $16, %ecx -; X64-NOPOPCNT-NEXT: xorl %edi, %ecx -; X64-NOPOPCNT-NEXT: xorl %eax, %eax -; X64-NOPOPCNT-NEXT: xorb %ch, %cl -; X64-NOPOPCNT-NEXT: setnp %al -; X64-NOPOPCNT-NEXT: addl %eax, %eax +; X64-NOPOPCNT-NEXT: movl %edi, %eax +; X64-NOPOPCNT-NEXT: shrl %eax +; X64-NOPOPCNT-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NOPOPCNT-NEXT: subl %eax, %edi +; X64-NOPOPCNT-NEXT: movl %edi, %eax +; X64-NOPOPCNT-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NOPOPCNT-NEXT: shrl $2, %edi +; X64-NOPOPCNT-NEXT: andl $858993459, %edi # imm = 0x33333333 +; X64-NOPOPCNT-NEXT: addl %eax, %edi +; X64-NOPOPCNT-NEXT: movl %edi, %eax +; X64-NOPOPCNT-NEXT: shrl $4, %eax +; X64-NOPOPCNT-NEXT: addl %edi, %eax +; X64-NOPOPCNT-NEXT: andl $17764111, %eax # imm = 0x10F0F0F +; X64-NOPOPCNT-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 +; X64-NOPOPCNT-NEXT: shrl $23, %eax +; X64-NOPOPCNT-NEXT: andl $2, %eax ; X64-NOPOPCNT-NEXT: retq ; ; X86-POPCNT-LABEL: parity_32_shift: ; X86-POPCNT: # %bb.0: ; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: andl $1, %eax ; X86-POPCNT-NEXT: addl %eax, %eax +; X86-POPCNT-NEXT: andl $2, %eax ; X86-POPCNT-NEXT: retl ; ; X64-POPCNT-LABEL: parity_32_shift: ; X64-POPCNT: # %bb.0: ; X64-POPCNT-NEXT: popcntl %edi, %eax -; X64-POPCNT-NEXT: andl $1, %eax ; X64-POPCNT-NEXT: addl %eax, %eax +; X64-POPCNT-NEXT: andl $2, %eax ; X64-POPCNT-NEXT: retq %2 = tail call i32 @llvm.ctpop.i32(i32 %0) %3 = shl nuw nsw i32 %2, 1 @@ -615,14 +658,22 @@ ; X86-NOPOPCNT-LABEL: parity_64_shift: ; X86-NOPOPCNT: # %bb.0: ; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOPOPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax -; X86-NOPOPCNT-NEXT: movl %eax, %ecx -; X86-NOPOPCNT-NEXT: shrl $16, %ecx -; X86-NOPOPCNT-NEXT: xorl %eax, %ecx +; X86-NOPOPCNT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOPOPCNT-NEXT: movl %ecx, %edx +; X86-NOPOPCNT-NEXT: shrl $16, %edx +; X86-NOPOPCNT-NEXT: xorl %ecx, %edx +; X86-NOPOPCNT-NEXT: xorl %ecx, %ecx +; X86-NOPOPCNT-NEXT: xorb %dh, %dl +; X86-NOPOPCNT-NEXT: setnp %cl +; X86-NOPOPCNT-NEXT: movl %eax, %edx +; X86-NOPOPCNT-NEXT: shrl $16, %edx +; X86-NOPOPCNT-NEXT: xorl %eax, %edx ; X86-NOPOPCNT-NEXT: xorl %eax, %eax -; X86-NOPOPCNT-NEXT: xorb %ch, %cl +; X86-NOPOPCNT-NEXT: xorb %dh, %dl ; X86-NOPOPCNT-NEXT: setnp %al +; X86-NOPOPCNT-NEXT: addl %ecx, %eax ; X86-NOPOPCNT-NEXT: addl %eax, %eax +; X86-NOPOPCNT-NEXT: andl $2, %eax ; X86-NOPOPCNT-NEXT: xorl %edx, %edx ; X86-NOPOPCNT-NEXT: retl ; @@ -637,16 +688,16 @@ ; X64-NOPOPCNT-NEXT: xorl %eax, %eax ; X64-NOPOPCNT-NEXT: xorb %ch, %cl ; X64-NOPOPCNT-NEXT: setnp %al -; X64-NOPOPCNT-NEXT: addq %rax, %rax +; X64-NOPOPCNT-NEXT: addl %eax, %eax ; X64-NOPOPCNT-NEXT: retq ; ; X86-POPCNT-LABEL: parity_64_shift: ; X86-POPCNT: # %bb.0: -; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: xorl {{[0-9]+}}(%esp), %eax -; X86-POPCNT-NEXT: popcntl %eax, %eax -; X86-POPCNT-NEXT: andl $1, %eax +; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx +; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax +; X86-POPCNT-NEXT: addl %ecx, %eax ; X86-POPCNT-NEXT: addl %eax, %eax +; X86-POPCNT-NEXT: andl $2, %eax ; X86-POPCNT-NEXT: xorl %edx, %edx ; X86-POPCNT-NEXT: retl ; @@ -654,7 +705,7 @@ ; X64-POPCNT: # %bb.0: ; X64-POPCNT-NEXT: popcntq %rdi, %rax ; X64-POPCNT-NEXT: andl $1, %eax -; X64-POPCNT-NEXT: addq %rax, %rax +; X64-POPCNT-NEXT: addl %eax, %eax ; X64-POPCNT-NEXT: retq %2 = tail call i64 @llvm.ctpop.i64(i64 %0) %3 = shl nuw nsw i64 %2, 1 diff --git a/llvm/test/CodeGen/X86/setcc.ll b/llvm/test/CodeGen/X86/setcc.ll --- a/llvm/test/CodeGen/X86/setcc.ll +++ b/llvm/test/CodeGen/X86/setcc.ll @@ -76,19 +76,21 @@ define i32 @t4(i32 %a) { ; X86-LABEL: t4: ; X86: ## %bb.0: -; X86-NEXT: movl L_v4$non_lazy_ptr, %ecx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl $1, (%ecx) -; X86-NEXT: adcw $1, %ax +; X86-NEXT: movl L_v4$non_lazy_ptr, %eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: cmpl $1, (%eax) +; X86-NEXT: adcw $1, %cx +; X86-NEXT: movzwl %cx, %eax ; X86-NEXT: shll $16, %eax ; X86-NEXT: retl ; ; X64-LABEL: t4: ; X64: ## %bb.0: -; X64-NEXT: movq _v4@GOTPCREL(%rip), %rcx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl $1, (%rcx) -; X64-NEXT: adcw $1, %ax +; X64-NEXT: movq _v4@GOTPCREL(%rip), %rax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: cmpl $1, (%rax) +; X64-NEXT: adcw $1, %cx +; X64-NEXT: movzwl %cx, %eax ; X64-NEXT: shll $16, %eax ; X64-NEXT: retq %t0 = load i32, ptr @v4, align 4 diff --git a/llvm/test/CodeGen/X86/split-store.ll b/llvm/test/CodeGen/X86/split-store.ll --- a/llvm/test/CodeGen/X86/split-store.ll +++ b/llvm/test/CodeGen/X86/split-store.ll @@ -176,7 +176,7 @@ define void @int12_int12_pair(i12 signext %tmp1, i12 signext %tmp2, ptr %ref.tmp) { ; CHECK-LABEL: int12_int12_pair: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movzwl %si, %eax ; CHECK-NEXT: shll $12, %eax ; CHECK-NEXT: andl $4095, %edi # imm = 0xFFF ; CHECK-NEXT: orl %eax, %edi @@ -197,9 +197,10 @@ define void @int7_int7_pair(i7 signext %tmp1, i7 signext %tmp2, ptr %ref.tmp) { ; CHECK-LABEL: int7_int7_pair: ; CHECK: # %bb.0: -; CHECK-NEXT: shll $7, %esi +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: shll $7, %eax ; CHECK-NEXT: andl $127, %edi -; CHECK-NEXT: orl %esi, %edi +; CHECK-NEXT: orl %eax, %edi ; CHECK-NEXT: andl $16383, %edi # imm = 0x3FFF ; CHECK-NEXT: movw %di, (%rdx) ; CHECK-NEXT: retq @@ -216,6 +217,7 @@ define void @int1_int1_pair(i1 signext %tmp1, i1 signext %tmp2, ptr %ref.tmp) { ; CHECK-LABEL: int1_int1_pair: ; CHECK: # %bb.0: +; CHECK-NEXT: andb $1, %sil ; CHECK-NEXT: addb %sil, %sil ; CHECK-NEXT: subb %dil, %sil ; CHECK-NEXT: andb $3, %sil diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll @@ -151,7 +151,7 @@ define <8 x i32> @PR46393(<8 x i16> %a0, i8 %a1) { ; X86-LABEL: PR46393: ; X86: # %bb.0: -; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; X86-NEXT: vpmovsxwd %xmm0, %ymm0 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpslld $16, %ymm0, %ymm0 {%k1} {z} @@ -159,7 +159,7 @@ ; ; X64-LABEL: PR46393: ; X64: # %bb.0: -; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; X64-NEXT: vpmovsxwd %xmm0, %ymm0 ; X64-NEXT: kmovd %edi, %k1 ; X64-NEXT: vpslld $16, %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq