Index: test/CodeGen/AArch64/bmi-bzhi.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/bmi-bzhi.ll @@ -0,0 +1,816 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=36419 +; https://bugs.llvm.org/show_bug.cgi?id=37603 +; https://bugs.llvm.org/show_bug.cgi?id=37610 + +; Patterns: +; a) x & (1 << nbits) - 1 +; b) x & ~(-1 << nbits) +; c) x & (-1 >> (32 - y)) +; d) x << (32 - y) >> (32 - y) +; are equivalent. + +; ---------------------------------------------------------------------------- ; +; Pattern a. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_a0: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: sub w8, w8, #1 // =1 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) { +; CHECK-LABEL: bzhi32_a1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: sub w8, w8, #1 // =1 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %conv = zext i8 %numlowbits to i32 + %onebit = shl i32 1, %conv + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_a2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: lsl w9, w9, w1 +; CHECK-NEXT: sub w9, w9, #1 // =1 +; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) { +; CHECK-LABEL: bzhi32_a3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: lsl w9, w9, w1 +; CHECK-NEXT: sub w9, w9, #1 // =1 +; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %conv = zext i8 %numlowbits to i32 + %onebit = shl i32 1, %conv + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_a4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: sub w8, w8, #1 // =1 +; CHECK-NEXT: and w0, w0, w8 +; CHECK-NEXT: ret + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +define i16 @bzhi32_a5_trunc16(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_a5_trunc16: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: sub w8, w8, #1 // =1 +; CHECK-NEXT: and w0, w0, w8 +; CHECK-NEXT: ret + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %val, %mask ; swapped order + %ret = trunc i32 %masked to i16 + ret i16 %ret +} + +; 64-bit + +define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_a0: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: ret + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) { +; CHECK-LABEL: bzhi64_a1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: ret + %conv = zext i8 %numlowbits to i64 + %onebit = shl i64 1, %conv + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_a2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: lsl x9, x9, x1 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) { +; CHECK-LABEL: bzhi64_a3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x1 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: lsl x9, x9, x1 +; CHECK-NEXT: sub x9, x9, #1 // =1 +; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %conv = zext i8 %numlowbits to i64 + %onebit = shl i64 1, %conv + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_a4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: and x0, x0, x8 +; CHECK-NEXT: ret + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +define i32 @bzhi64_a5_trunc32(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_a5_trunc32: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: sub w8, w8, #1 // =1 +; CHECK-NEXT: and w0, w0, w8 +; CHECK-NEXT: ret + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %val, %mask ; swapped order + %ret = trunc i64 %masked to i32 + ret i32 %ret +} + +define i16 @bzhi64_a6_trunc16(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_a6_trunc16: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: sub w8, w8, #1 // =1 +; CHECK-NEXT: and w0, w0, w8 +; CHECK-NEXT: ret + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %val, %mask ; swapped order + %ret = trunc i64 %masked to i16 + ret i16 %ret +} + +; ---------------------------------------------------------------------------- ; +; Pattern b. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_b0: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) { +; CHECK-LABEL: bzhi32_b1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %conv = zext i8 %numlowbits to i32 + %notmask = shl i32 -1, %conv + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_b2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsl w9, w9, w1 +; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) { +; CHECK-LABEL: bzhi32_b3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsl w9, w9, w1 +; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %conv = zext i8 %numlowbits to i32 + %notmask = shl i32 -1, %conv + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_b4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +define i16 @bzhi32_b5_trunc16(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_b5_trunc16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %val, %mask ; swapped order + %ret = trunc i32 %masked to i16 + ret i16 %ret +} + +; 64-bit + +define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_b0: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: bic x0, x0, x8 +; CHECK-NEXT: ret + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) { +; CHECK-LABEL: bzhi64_b1_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: bic x0, x0, x8 +; CHECK-NEXT: ret + %conv = zext i8 %numlowbits to i64 + %notmask = shl i64 -1, %conv + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_b2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsl x9, x9, x1 +; CHECK-NEXT: bic x0, x8, x9 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) { +; CHECK-LABEL: bzhi64_b3_load_indexzext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: lsl x9, x9, x1 +; CHECK-NEXT: bic x0, x8, x9 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %conv = zext i8 %numlowbits to i64 + %notmask = shl i64 -1, %conv + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_b4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: bic x0, x0, x8 +; CHECK-NEXT: ret + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +define i32 @bzhi64_b5_trunc32(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_b5_trunc32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %val, %mask ; swapped order + %ret = trunc i64 %masked to i32 + ret i32 %ret +} + +define i16 @bzhi64_b6_trunc16(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_b6_trunc16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: bic w0, w0, w8 +; CHECK-NEXT: ret + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %val, %mask ; swapped order + %ret = trunc i64 %masked to i16 + ret i16 %ret +} + +; ---------------------------------------------------------------------------- ; +; Pattern c. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_c0: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c1_zext(i32 %val, i16 %numlowbits) { +; CHECK-LABEL: bzhi32_c1_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x20 +; CHECK-NEXT: sub w8, w8, w1 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %numhighbits = sub i16 32, %numlowbits + %sh_prom = zext i16 %numhighbits to i32 + %mask = lshr i32 -1, %sh_prom + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_c2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: lsr w9, w10, w9 +; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c3_load_zext(i32* %w, i16 %numlowbits) { +; CHECK-LABEL: bzhi32_c3_load_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x20 +; CHECK-NEXT: sub w9, w9, w1 +; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: lsr w9, w10, w9 +; CHECK-NEXT: and w0, w9, w8 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %numhighbits = sub i16 32, %numlowbits + %sh_prom = zext i16 %numhighbits to i32 + %mask = lshr i32 -1, %sh_prom + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_c4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w0, w0, w8 +; CHECK-NEXT: ret + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +define i16 @bzhi32_c5_trunc16(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_c5_trunc16: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + %ret = trunc i32 %masked to i16 + ret i16 %ret +} + +; 64-bit + +define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_c0: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: ret + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c1_zext(i64 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi64_c1_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: and x0, x8, x0 +; CHECK-NEXT: ret + %numhighbits = sub i32 64, %numlowbits + %sh_prom = zext i32 %numhighbits to i64 + %mask = lshr i64 -1, %sh_prom + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_c2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: mov x10, #-1 +; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c3_load_zext(i64* %w, i32 %numlowbits) { +; CHECK-LABEL: bzhi64_c3_load_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: mov x10, #-1 +; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %numhighbits = sub i32 64, %numlowbits + %sh_prom = zext i32 %numhighbits to i64 + %mask = lshr i64 -1, %sh_prom + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_c4_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: and x0, x0, x8 +; CHECK-NEXT: ret + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +define i32 @bzhi64_c5_trunc32(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_c5_trunc32: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + %ret = trunc i64 %masked to i32 + ret i32 %ret +} + +define i16 @bzhi64_c6_trunc16(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_c6_trunc16: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: and w0, w8, w0 +; CHECK-NEXT: ret + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + %ret = trunc i64 %masked to i16 + ret i16 %ret +} + +; ---------------------------------------------------------------------------- ; +; Pattern d. 32-bit. +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_d0: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ret + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +define i32 @bzhi32_d1_zext(i32 %val, i16 %numlowbits) { +; CHECK-LABEL: bzhi32_d1_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x20 +; CHECK-NEXT: sub w8, w8, w1 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ret + %numhighbits = sub i16 32, %numlowbits + %sh_prom = zext i16 %numhighbits to i32 + %highbitscleared = shl i32 %val, %sh_prom + %masked = lshr i32 %highbitscleared, %sh_prom + ret i32 %masked +} + +define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_d2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +define i32 @bzhi32_d3_load_zext(i32* %w, i16 %numlowbits) { +; CHECK-LABEL: bzhi32_d3_load_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: orr w9, wzr, #0x20 +; CHECK-NEXT: sub w9, w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 +; CHECK-NEXT: ret + %val = load i32, i32* %w + %numhighbits = sub i16 32, %numlowbits + %sh_prom = zext i16 %numhighbits to i32 + %highbitscleared = shl i32 %val, %sh_prom + %masked = lshr i32 %highbitscleared, %sh_prom + ret i32 %masked +} + +define i16 @bzhi32_d4_trunc16(i32 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi32_d4_trunc16: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: lsl w9, w0, w8 +; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ret + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + %ret = trunc i32 %masked to i16 + ret i16 %ret +} + +; 64-bit. + +define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_d0: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ret + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + ret i64 %masked +} + +define i64 @bzhi64_d1_zext(i64 %val, i32 %numlowbits) { +; CHECK-LABEL: bzhi64_d1_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w1 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ret + %numhighbits = sub i32 64, %numlowbits + %sh_prom = zext i32 %numhighbits to i64 + %highbitscleared = shl i64 %val, %sh_prom + %masked = lshr i64 %highbitscleared, %sh_prom + ret i64 %masked +} + +define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_d2_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + ret i64 %masked +} + +define i64 @bzhi64_d3_load_zext(i64* %w, i32 %numlowbits) { +; CHECK-LABEL: bzhi64_d3_load_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 +; CHECK-NEXT: ret + %val = load i64, i64* %w + %numhighbits = sub i32 64, %numlowbits + %sh_prom = zext i32 %numhighbits to i64 + %highbitscleared = shl i64 %val, %sh_prom + %masked = lshr i64 %highbitscleared, %sh_prom + ret i64 %masked +} + +define i32 @bzhi64_d4_trunc32(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_d4_trunc32: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + %ret = trunc i64 %masked to i32 + ret i32 %ret +} + +define i16 @bzhi64_d5_trunc16(i64 %val, i64 %numlowbits) { +; CHECK-LABEL: bzhi64_d5_trunc16: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x1 +; CHECK-NEXT: lsl x9, x0, x8 +; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + %ret = trunc i64 %masked to i16 + ret i16 %ret +} + +; ---------------------------------------------------------------------------- ; +; Constant mask +; ---------------------------------------------------------------------------- ; + +define i64 @bzhi64_constant_mask(i64 %val) { +; CHECK-LABEL: bzhi64_constant_mask: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, #0x3fffffffffffffff +; CHECK-NEXT: ret + %masked = and i64 %val, 4611686018427387903 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask_load(i64* %val) { +; CHECK-LABEL: bzhi64_constant_mask_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: and x0, x8, #0x3fffffffffffffff +; CHECK-NEXT: ret + %val1 = load i64, i64* %val + %masked = and i64 %val1, 4611686018427387903 + ret i64 %masked +} + +define i64 @bzhi64_small_constant_mask(i64 %val) { +; CHECK-LABEL: bzhi64_small_constant_mask: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, #0x7fffffff +; CHECK-NEXT: ret + %masked = and i64 %val, 2147483647 + ret i64 %masked +} Index: test/CodeGen/X86/bmi-bzhi.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/bmi-bzhi.ll @@ -0,0 +1,1703 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BMI,CHECK-BMI1 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BMI,CHECK-BMI2 + +; https://bugs.llvm.org/show_bug.cgi?id=36419 +; https://bugs.llvm.org/show_bug.cgi?id=37603 +; https://bugs.llvm.org/show_bug.cgi?id=37610 + +; Patterns: +; a) x & (1 << nbits) - 1 +; b) x & ~(-1 << nbits) +; c) x & (-1 >> (32 - y)) +; d) x << (32 - y) >> (32 - y) +; are equivalent. + +; ---------------------------------------------------------------------------- ; +; Pattern a. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_a0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: decl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_a0: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: decl %eax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_a0: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: retq + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_a1_indexzext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: decl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_a1_indexzext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: decl %eax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_a1_indexzext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: retq + %conv = zext i8 %numlowbits to i32 + %onebit = shl i32 1, %conv + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_a2_load: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: decl %eax +; CHECK-NOBMI-NEXT: andl (%rdi), %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_a2_load: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: decl %eax +; CHECK-BMI1-NEXT: andl (%rdi), %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_a2_load: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; CHECK-BMI2-NEXT: retq + %val = load i32, i32* %w + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_a3_load_indexzext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: decl %eax +; CHECK-NOBMI-NEXT: andl (%rdi), %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_a3_load_indexzext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: decl %eax +; CHECK-BMI1-NEXT: andl (%rdi), %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_a3_load_indexzext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; CHECK-BMI2-NEXT: retq + %val = load i32, i32* %w + %conv = zext i8 %numlowbits to i32 + %onebit = shl i32 1, %conv + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_a4_commutative: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: decl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_a4_commutative: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: decl %eax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_a4_commutative: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: retq + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +define i16 @bzhi32_a5_trunc16(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_a5_trunc16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: decl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_a5_trunc16: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: decl %eax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_a5_trunc16: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI2-NEXT: retq + %onebit = shl i32 1, %numlowbits + %mask = add nsw i32 %onebit, -1 + %masked = and i32 %val, %mask ; swapped order + %ret = trunc i32 %masked to i16 + ret i16 %ret +} + +; 64-bit + +define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_a0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: decq %rax +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_a0: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: decq %rax +; CHECK-BMI1-NEXT: andq %rdi, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_a0: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-BMI2-NEXT: retq + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_a1_indexzext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: decq %rax +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_a1_indexzext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: decq %rax +; CHECK-BMI1-NEXT: andq %rdi, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_a1_indexzext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-BMI2-NEXT: retq + %conv = zext i8 %numlowbits to i64 + %onebit = shl i64 1, %conv + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_a2_load: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: decq %rax +; CHECK-NOBMI-NEXT: andq (%rdi), %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_a2_load: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: decq %rax +; CHECK-BMI1-NEXT: andq (%rdi), %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_a2_load: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; CHECK-BMI2-NEXT: retq + %val = load i64, i64* %w + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_a3_load_indexzext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: decq %rax +; CHECK-NOBMI-NEXT: andq (%rdi), %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_a3_load_indexzext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: decq %rax +; CHECK-BMI1-NEXT: andq (%rdi), %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_a3_load_indexzext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; CHECK-BMI2-NEXT: retq + %val = load i64, i64* %w + %conv = zext i8 %numlowbits to i64 + %onebit = shl i64 1, %conv + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_a4_commutative: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: decq %rax +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_a4_commutative: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: decq %rax +; CHECK-BMI1-NEXT: andq %rdi, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_a4_commutative: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-BMI2-NEXT: retq + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +define i32 @bzhi64_a5_trunc32(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_a5_trunc32: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: decl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_a5_trunc32: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: decl %eax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_a5_trunc32: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movl $1, %eax +; CHECK-BMI2-NEXT: shlxq %rsi, %rax, %rax +; CHECK-BMI2-NEXT: decl %eax +; CHECK-BMI2-NEXT: andl %edi, %eax +; CHECK-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-BMI2-NEXT: retq + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %val, %mask ; swapped order + %ret = trunc i64 %masked to i32 + ret i32 %ret +} + +define i16 @bzhi64_a6_trunc16(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_a6_trunc16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: decl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_a6_trunc16: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: decl %eax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_a6_trunc16: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movl $1, %eax +; CHECK-BMI2-NEXT: shlxq %rsi, %rax, %rax +; CHECK-BMI2-NEXT: decl %eax +; CHECK-BMI2-NEXT: andl %edi, %eax +; CHECK-BMI2-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-BMI2-NEXT: retq + %onebit = shl i64 1, %numlowbits + %mask = add nsw i64 %onebit, -1 + %masked = and i64 %val, %mask ; swapped order + %ret = trunc i64 %masked to i16 + ret i16 %ret +} + +; ---------------------------------------------------------------------------- ; +; Pattern b. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_b0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_b0: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_b0: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movl $-1, %eax +; CHECK-BMI2-NEXT: shlxl %esi, %eax, %eax +; CHECK-BMI2-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI2-NEXT: retq + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_b1_indexzext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_b1_indexzext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_b1_indexzext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movl $-1, %eax +; CHECK-BMI2-NEXT: shlxl %esi, %eax, %eax +; CHECK-BMI2-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI2-NEXT: retq + %conv = zext i8 %numlowbits to i32 + %notmask = shl i32 -1, %conv + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_b2_load: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl (%rdi), %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_b2_load: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: andnl (%rdi), %eax, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_b2_load: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movl $-1, %eax +; CHECK-BMI2-NEXT: shlxl %esi, %eax, %eax +; CHECK-BMI2-NEXT: andnl (%rdi), %eax, %eax +; CHECK-BMI2-NEXT: retq + %val = load i32, i32* %w + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_b3_load_indexzext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl (%rdi), %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_b3_load_indexzext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: andnl (%rdi), %eax, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_b3_load_indexzext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movl $-1, %eax +; CHECK-BMI2-NEXT: shlxl %esi, %eax, %eax +; CHECK-BMI2-NEXT: andnl (%rdi), %eax, %eax +; CHECK-BMI2-NEXT: retq + %val = load i32, i32* %w + %conv = zext i8 %numlowbits to i32 + %notmask = shl i32 -1, %conv + %mask = xor i32 %notmask, -1 + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_b4_commutative: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_b4_commutative: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_b4_commutative: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movl $-1, %eax +; CHECK-BMI2-NEXT: shlxl %esi, %eax, %eax +; CHECK-BMI2-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI2-NEXT: retq + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +define i16 @bzhi32_b5_trunc16(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_b5_trunc16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_b5_trunc16: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI1-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_b5_trunc16: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movl $-1, %eax +; CHECK-BMI2-NEXT: shlxl %esi, %eax, %eax +; CHECK-BMI2-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI2-NEXT: retq + %notmask = shl i32 -1, %numlowbits + %mask = xor i32 %notmask, -1 + %masked = and i32 %val, %mask ; swapped order + %ret = trunc i32 %masked to i16 + ret i16 %ret +} + +; 64-bit + +define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_b0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: notq %rax +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_b0: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: andnq %rdi, %rax, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_b0: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movq $-1, %rax +; CHECK-BMI2-NEXT: shlxq %rsi, %rax, %rax +; CHECK-BMI2-NEXT: andnq %rdi, %rax, %rax +; CHECK-BMI2-NEXT: retq + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_b1_indexzext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: notq %rax +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_b1_indexzext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: andnq %rdi, %rax, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_b1_indexzext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI2-NEXT: movq $-1, %rax +; CHECK-BMI2-NEXT: shlxq %rsi, %rax, %rax +; CHECK-BMI2-NEXT: andnq %rdi, %rax, %rax +; CHECK-BMI2-NEXT: retq + %conv = zext i8 %numlowbits to i64 + %notmask = shl i64 -1, %conv + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_b2_load: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: notq %rax +; CHECK-NOBMI-NEXT: andq (%rdi), %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_b2_load: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: andnq (%rdi), %rax, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_b2_load: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movq $-1, %rax +; CHECK-BMI2-NEXT: shlxq %rsi, %rax, %rax +; CHECK-BMI2-NEXT: andnq (%rdi), %rax, %rax +; CHECK-BMI2-NEXT: retq + %val = load i64, i64* %w + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_b3_load_indexzext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: notq %rax +; CHECK-NOBMI-NEXT: andq (%rdi), %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_b3_load_indexzext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: andnq (%rdi), %rax, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_b3_load_indexzext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI2-NEXT: movq $-1, %rax +; CHECK-BMI2-NEXT: shlxq %rsi, %rax, %rax +; CHECK-BMI2-NEXT: andnq (%rdi), %rax, %rax +; CHECK-BMI2-NEXT: retq + %val = load i64, i64* %w + %conv = zext i8 %numlowbits to i64 + %notmask = shl i64 -1, %conv + %mask = xor i64 %notmask, -1 + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_b4_commutative: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: notq %rax +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_b4_commutative: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: andnq %rdi, %rax, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_b4_commutative: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movq $-1, %rax +; CHECK-BMI2-NEXT: shlxq %rsi, %rax, %rax +; CHECK-BMI2-NEXT: andnq %rdi, %rax, %rax +; CHECK-BMI2-NEXT: retq + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +define i32 @bzhi64_b5_trunc32(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_b5_trunc32: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_b5_trunc32: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_b5_trunc32: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movq $-1, %rax +; CHECK-BMI2-NEXT: shlxq %rsi, %rax, %rax +; CHECK-BMI2-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI2-NEXT: retq + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %val, %mask ; swapped order + %ret = trunc i64 %masked to i32 + ret i32 %ret +} + +define i16 @bzhi64_b6_trunc16(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_b6_trunc16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: movl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: notl %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_b6_trunc16: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: movl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI1-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_b6_trunc16: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movq $-1, %rax +; CHECK-BMI2-NEXT: shlxq %rsi, %rax, %rax +; CHECK-BMI2-NEXT: andnl %edi, %eax, %eax +; CHECK-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI2-NEXT: retq + %notmask = shl i64 -1, %numlowbits + %mask = xor i64 %notmask, -1 + %masked = and i64 %val, %mask ; swapped order + %ret = trunc i64 %masked to i16 + ret i16 %ret +} + +; ---------------------------------------------------------------------------- ; +; Pattern c. 32-bit +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_c0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_c0: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %eax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_c0: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c1_zext(i32 %val, i16 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_c1_zext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_c1_zext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %eax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_c1_zext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i16 32, %numlowbits + %sh_prom = zext i16 %numhighbits to i32 + %mask = lshr i32 -1, %sh_prom + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_c2_load: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %eax +; CHECK-NOBMI-NEXT: andl (%rdi), %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_c2_load: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %eax +; CHECK-BMI1-NEXT: andl (%rdi), %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_c2_load: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; CHECK-BMI2-NEXT: retq + %val = load i32, i32* %w + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c3_load_zext(i32* %w, i16 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_c3_load_zext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %eax +; CHECK-NOBMI-NEXT: andl (%rdi), %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_c3_load_zext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %eax +; CHECK-BMI1-NEXT: andl (%rdi), %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_c3_load_zext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; CHECK-BMI2-NEXT: retq + %val = load i32, i32* %w + %numhighbits = sub i16 32, %numlowbits + %sh_prom = zext i16 %numhighbits to i32 + %mask = lshr i32 -1, %sh_prom + %masked = and i32 %mask, %val + ret i32 %masked +} + +define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_c4_commutative: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_c4_commutative: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %eax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_c4_commutative: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %val, %mask ; swapped order + ret i32 %masked +} + +define i16 @bzhi32_c5_trunc16(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_c5_trunc16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movl $-1, %eax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %eax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_c5_trunc16: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movl $-1, %eax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %eax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_c5_trunc16: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i32 32, %numlowbits + %mask = lshr i32 -1, %numhighbits + %masked = and i32 %mask, %val + %ret = trunc i32 %masked to i16 + ret i16 %ret +} + +; 64-bit + +define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_c0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rax +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_c0: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rax +; CHECK-BMI1-NEXT: andq %rdi, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_c0: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c1_zext(i64 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_c1_zext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rax +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_c1_zext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rax +; CHECK-BMI1-NEXT: andq %rdi, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_c1_zext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i32 64, %numlowbits + %sh_prom = zext i32 %numhighbits to i64 + %mask = lshr i64 -1, %sh_prom + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_c2_load: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rax +; CHECK-NOBMI-NEXT: andq (%rdi), %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_c2_load: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rax +; CHECK-BMI1-NEXT: andq (%rdi), %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_c2_load: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; CHECK-BMI2-NEXT: retq + %val = load i64, i64* %w + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c3_load_zext(i64* %w, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_c3_load_zext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rax +; CHECK-NOBMI-NEXT: andq (%rdi), %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_c3_load_zext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rax +; CHECK-BMI1-NEXT: andq (%rdi), %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_c3_load_zext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; CHECK-BMI2-NEXT: retq + %val = load i64, i64* %w + %numhighbits = sub i32 64, %numlowbits + %sh_prom = zext i32 %numhighbits to i64 + %mask = lshr i64 -1, %sh_prom + %masked = and i64 %mask, %val + ret i64 %masked +} + +define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_c4_commutative: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rax +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_c4_commutative: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rax +; CHECK-BMI1-NEXT: andq %rdi, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_c4_commutative: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %val, %mask ; swapped order + ret i64 %masked +} + +define i32 @bzhi64_c5_trunc32(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_c5_trunc32: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_c5_trunc32: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_c5_trunc32: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movl $64, %eax +; CHECK-BMI2-NEXT: subl %esi, %eax +; CHECK-BMI2-NEXT: movq $-1, %rcx +; CHECK-BMI2-NEXT: shrxq %rax, %rcx, %rax +; CHECK-BMI2-NEXT: andl %edi, %eax +; CHECK-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + %ret = trunc i64 %masked to i32 + ret i32 %ret +} + +define i16 @bzhi64_c6_trunc16(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_c6_trunc16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: movq $-1, %rax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rax +; CHECK-NOBMI-NEXT: andl %edi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_c6_trunc16: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: movq $-1, %rax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rax +; CHECK-BMI1-NEXT: andl %edi, %eax +; CHECK-BMI1-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_c6_trunc16: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movl $64, %eax +; CHECK-BMI2-NEXT: subl %esi, %eax +; CHECK-BMI2-NEXT: movq $-1, %rcx +; CHECK-BMI2-NEXT: shrxq %rax, %rcx, %rax +; CHECK-BMI2-NEXT: andl %edi, %eax +; CHECK-BMI2-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i64 64, %numlowbits + %mask = lshr i64 -1, %numhighbits + %masked = and i64 %mask, %val + %ret = trunc i64 %masked to i16 + ret i16 %ret +} + +; ---------------------------------------------------------------------------- ; +; Pattern d. 32-bit. +; ---------------------------------------------------------------------------- ; + +define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_d0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %edi +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_d0: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %edi +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %edi +; CHECK-BMI1-NEXT: movl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_d0: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +define i32 @bzhi32_d1_zext(i32 %val, i16 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_d1_zext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %edi +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_d1_zext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %edi +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %edi +; CHECK-BMI1-NEXT: movl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_d1_zext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i16 32, %numlowbits + %sh_prom = zext i16 %numhighbits to i32 + %highbitscleared = shl i32 %val, %sh_prom + %masked = lshr i32 %highbitscleared, %sh_prom + ret i32 %masked +} + +define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_d2_load: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl (%rdi), %eax +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_d2_load: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl (%rdi), %eax +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_d2_load: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; CHECK-BMI2-NEXT: retq + %val = load i32, i32* %w + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + ret i32 %masked +} + +define i32 @bzhi32_d3_load_zext(i32* %w, i16 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_d3_load_zext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl (%rdi), %eax +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %eax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_d3_load_zext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl (%rdi), %eax +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %eax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_d3_load_zext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, (%rdi), %eax +; CHECK-BMI2-NEXT: retq + %val = load i32, i32* %w + %numhighbits = sub i16 32, %numlowbits + %sh_prom = zext i16 %numhighbits to i32 + %highbitscleared = shl i32 %val, %sh_prom + %masked = lshr i32 %highbitscleared, %sh_prom + ret i32 %masked +} + +define i16 @bzhi32_d4_trunc16(i32 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi32_d4_trunc16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $32, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shll %cl, %edi +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrl %cl, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi32_d4_trunc16: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $32, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shll %cl, %edi +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrl %cl, %edi +; CHECK-BMI1-NEXT: movl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi32_d4_trunc16: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhil %esi, %edi, %eax +; CHECK-BMI2-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i32 32, %numlowbits + %highbitscleared = shl i32 %val, %numhighbits + %masked = lshr i32 %highbitscleared, %numhighbits + %ret = trunc i32 %masked to i16 + ret i16 %ret +} + +; 64-bit. + +define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_d0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rdi +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rdi +; CHECK-NOBMI-NEXT: movq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_d0: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rdi +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rdi +; CHECK-BMI1-NEXT: movq %rdi, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_d0: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + ret i64 %masked +} + +define i64 @bzhi64_d1_zext(i64 %val, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_d1_zext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rdi +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rdi +; CHECK-NOBMI-NEXT: movq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_d1_zext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rdi +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rdi +; CHECK-BMI1-NEXT: movq %rdi, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_d1_zext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i32 64, %numlowbits + %sh_prom = zext i32 %numhighbits to i64 + %highbitscleared = shl i64 %val, %sh_prom + %masked = lshr i64 %highbitscleared, %sh_prom + ret i64 %masked +} + +define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_d2_load: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movq (%rdi), %rax +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_d2_load: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movq (%rdi), %rax +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_d2_load: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; CHECK-BMI2-NEXT: retq + %val = load i64, i64* %w + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + ret i64 %masked +} + +define i64 @bzhi64_d3_load_zext(i64* %w, i32 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_d3_load_zext: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movq (%rdi), %rax +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rax +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_d3_load_zext: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movq (%rdi), %rax +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rax +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_d3_load_zext: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax +; CHECK-BMI2-NEXT: retq + %val = load i64, i64* %w + %numhighbits = sub i32 64, %numlowbits + %sh_prom = zext i32 %numhighbits to i64 + %highbitscleared = shl i64 %val, %sh_prom + %masked = lshr i64 %highbitscleared, %sh_prom + ret i64 %masked +} + +define i32 @bzhi64_d4_trunc32(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_d4_trunc32: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rdi +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rdi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_d4_trunc32: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rdi +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rdi +; CHECK-BMI1-NEXT: movl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_d4_trunc32: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-BMI2-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + %ret = trunc i64 %masked to i32 + ret i32 %ret +} + +define i16 @bzhi64_d5_trunc16(i64 %val, i64 %numlowbits) { +; CHECK-NOBMI-LABEL: bzhi64_d5_trunc16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl $64, %ecx +; CHECK-NOBMI-NEXT: subl %esi, %ecx +; CHECK-NOBMI-NEXT: shlq %cl, %rdi +; CHECK-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NOBMI-NEXT: shrq %cl, %rdi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_d5_trunc16: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $64, %ecx +; CHECK-BMI1-NEXT: subl %esi, %ecx +; CHECK-BMI1-NEXT: shlq %cl, %rdi +; CHECK-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-BMI1-NEXT: shrq %cl, %rdi +; CHECK-BMI1-NEXT: movl %edi, %eax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_d5_trunc16: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-BMI2-NEXT: # kill: def $ax killed $ax killed $rax +; CHECK-BMI2-NEXT: retq + %numhighbits = sub i64 64, %numlowbits + %highbitscleared = shl i64 %val, %numhighbits + %masked = lshr i64 %highbitscleared, %numhighbits + %ret = trunc i64 %masked to i16 + ret i16 %ret +} + +; ---------------------------------------------------------------------------- ; +; Constant mask +; ---------------------------------------------------------------------------- ; + +define i64 @bzhi64_constant_mask(i64 %val) { +; CHECK-NOBMI-LABEL: bzhi64_constant_mask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_constant_mask: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $15872, %eax # imm = 0x3E00 +; CHECK-BMI1-NEXT: bextrq %rax, %rdi, %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_constant_mask: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movb $62, %al +; CHECK-BMI2-NEXT: bzhiq %rax, %rdi, %rax +; CHECK-BMI2-NEXT: retq + %masked = and i64 %val, 4611686018427387903 + ret i64 %masked +} + +define i64 @bzhi64_constant_mask_load(i64* %val) { +; CHECK-NOBMI-LABEL: bzhi64_constant_mask_load: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF +; CHECK-NOBMI-NEXT: andq (%rdi), %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI1-LABEL: bzhi64_constant_mask_load: +; CHECK-BMI1: # %bb.0: +; CHECK-BMI1-NEXT: movl $15872, %eax # imm = 0x3E00 +; CHECK-BMI1-NEXT: bextrq %rax, (%rdi), %rax +; CHECK-BMI1-NEXT: retq +; +; CHECK-BMI2-LABEL: bzhi64_constant_mask_load: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: movb $62, %al +; CHECK-BMI2-NEXT: bzhiq %rax, (%rdi), %rax +; CHECK-BMI2-NEXT: retq + %val1 = load i64, i64* %val + %masked = and i64 %val1, 4611686018427387903 + ret i64 %masked +} + +define i64 @bzhi64_small_constant_mask(i64 %val) { +; CHECK-LABEL: bzhi64_small_constant_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %masked = and i64 %val, 2147483647 + ret i64 %masked +} Index: test/CodeGen/X86/bmi.ll =================================================================== --- test/CodeGen/X86/bmi.ll +++ test/CodeGen/X86/bmi.ll @@ -443,278 +443,6 @@ ret i64 %and } -define i32 @bzhi32b(i32 %x, i8 zeroext %index) { -; BMI1-LABEL: bzhi32b: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $1, %eax -; BMI1-NEXT: movl %esi, %ecx -; BMI1-NEXT: shll %cl, %eax -; BMI1-NEXT: decl %eax -; BMI1-NEXT: andl %edi, %eax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi32b: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: bzhil %esi, %edi, %eax -; BMI2-NEXT: retq -entry: - %conv = zext i8 %index to i32 - %shl = shl i32 1, %conv - %sub = add nsw i32 %shl, -1 - %and = and i32 %sub, %x - ret i32 %and -} - -define i32 @bzhi32b_load(i32* %w, i8 zeroext %index) { -; BMI1-LABEL: bzhi32b_load: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $1, %eax -; BMI1-NEXT: movl %esi, %ecx -; BMI1-NEXT: shll %cl, %eax -; BMI1-NEXT: decl %eax -; BMI1-NEXT: andl (%rdi), %eax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi32b_load: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: bzhil %esi, (%rdi), %eax -; BMI2-NEXT: retq -entry: - %x = load i32, i32* %w - %conv = zext i8 %index to i32 - %shl = shl i32 1, %conv - %sub = add nsw i32 %shl, -1 - %and = and i32 %sub, %x - ret i32 %and -} - -define i32 @bzhi32c(i32 %x, i8 zeroext %index) { -; BMI1-LABEL: bzhi32c: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $1, %eax -; BMI1-NEXT: movl %esi, %ecx -; BMI1-NEXT: shll %cl, %eax -; BMI1-NEXT: decl %eax -; BMI1-NEXT: andl %edi, %eax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi32c: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: bzhil %esi, %edi, %eax -; BMI2-NEXT: retq -entry: - %conv = zext i8 %index to i32 - %shl = shl i32 1, %conv - %sub = add nsw i32 %shl, -1 - %and = and i32 %x, %sub - ret i32 %and -} - -define i32 @bzhi32d(i32 %a, i32 %b) { -; BMI1-LABEL: bzhi32d: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $32, %ecx -; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: movl $-1, %eax -; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; BMI1-NEXT: shrl %cl, %eax -; BMI1-NEXT: andl %edi, %eax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi32d: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: bzhil %esi, %edi, %eax -; BMI2-NEXT: retq -entry: - %sub = sub i32 32, %b - %shr = lshr i32 -1, %sub - %and = and i32 %shr, %a - ret i32 %and -} - -define i32 @bzhi32e(i32 %a, i32 %b) { -; BMI1-LABEL: bzhi32e: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $32, %ecx -; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shll %cl, %edi -; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; BMI1-NEXT: shrl %cl, %edi -; BMI1-NEXT: movl %edi, %eax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi32e: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: bzhil %esi, %edi, %eax -; BMI2-NEXT: retq -entry: - %sub = sub i32 32, %b - %shl = shl i32 %a, %sub - %shr = lshr i32 %shl, %sub - ret i32 %shr -} - -define i64 @bzhi64b(i64 %x, i8 zeroext %index) { -; BMI1-LABEL: bzhi64b: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $1, %eax -; BMI1-NEXT: movl %esi, %ecx -; BMI1-NEXT: shlq %cl, %rax -; BMI1-NEXT: decq %rax -; BMI1-NEXT: andq %rdi, %rax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi64b: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; BMI2-NEXT: retq -entry: - %conv = zext i8 %index to i64 - %shl = shl i64 1, %conv - %sub = add nsw i64 %shl, -1 - %and = and i64 %x, %sub - ret i64 %and -} - -define i64 @bzhi64c(i64 %a, i64 %b) { -; BMI1-LABEL: bzhi64c: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $64, %ecx -; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: movq $-1, %rax -; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; BMI1-NEXT: shrq %cl, %rax -; BMI1-NEXT: andq %rdi, %rax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi64c: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; BMI2-NEXT: retq -entry: - %sub = sub i64 64, %b - %shr = lshr i64 -1, %sub - %and = and i64 %shr, %a - ret i64 %and -} - -define i64 @bzhi64d(i64 %a, i32 %b) { -; BMI1-LABEL: bzhi64d: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $64, %ecx -; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: movq $-1, %rax -; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; BMI1-NEXT: shrq %cl, %rax -; BMI1-NEXT: andq %rdi, %rax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi64d: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; BMI2-NEXT: retq -entry: - %sub = sub i32 64, %b - %sh_prom = zext i32 %sub to i64 - %shr = lshr i64 -1, %sh_prom - %and = and i64 %shr, %a - ret i64 %and -} - -define i64 @bzhi64e(i64 %a, i64 %b) { -; BMI1-LABEL: bzhi64e: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $64, %ecx -; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shlq %cl, %rdi -; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; BMI1-NEXT: shrq %cl, %rdi -; BMI1-NEXT: movq %rdi, %rax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi64e: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; BMI2-NEXT: retq -entry: - %sub = sub i64 64, %b - %shl = shl i64 %a, %sub - %shr = lshr i64 %shl, %sub - ret i64 %shr -} - -define i64 @bzhi64f(i64 %a, i32 %b) { -; BMI1-LABEL: bzhi64f: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $64, %ecx -; BMI1-NEXT: subl %esi, %ecx -; BMI1-NEXT: shlq %cl, %rdi -; BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; BMI1-NEXT: shrq %cl, %rdi -; BMI1-NEXT: movq %rdi, %rax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi64f: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: # kill: def $esi killed $esi def $rsi -; BMI2-NEXT: bzhiq %rsi, %rdi, %rax -; BMI2-NEXT: retq -entry: - %sub = sub i32 64, %b - %sh_prom = zext i32 %sub to i64 - %shl = shl i64 %a, %sh_prom - %shr = lshr i64 %shl, %sh_prom - ret i64 %shr -} - -define i64 @bzhi64_constant_mask(i64 %x) { -; BMI1-LABEL: bzhi64_constant_mask: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $15872, %eax # imm = 0x3E00 -; BMI1-NEXT: bextrq %rax, %rdi, %rax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi64_constant_mask: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: movb $62, %al -; BMI2-NEXT: bzhiq %rax, %rdi, %rax -; BMI2-NEXT: retq -entry: - %and = and i64 %x, 4611686018427387903 - ret i64 %and -} - -define i64 @bzhi64_constant_mask_load(i64* %x) { -; BMI1-LABEL: bzhi64_constant_mask_load: -; BMI1: # %bb.0: # %entry -; BMI1-NEXT: movl $15872, %eax # imm = 0x3E00 -; BMI1-NEXT: bextrq %rax, (%rdi), %rax -; BMI1-NEXT: retq -; -; BMI2-LABEL: bzhi64_constant_mask_load: -; BMI2: # %bb.0: # %entry -; BMI2-NEXT: movb $62, %al -; BMI2-NEXT: bzhiq %rax, (%rdi), %rax -; BMI2-NEXT: retq -entry: - %x1 = load i64, i64* %x - %and = and i64 %x1, 4611686018427387903 - ret i64 %and -} - -define i64 @bzhi64_small_constant_mask(i64 %x) { -; CHECK-LABEL: bzhi64_small_constant_mask: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: retq -entry: - %and = and i64 %x, 2147483647 - ret i64 %and -} - define i32 @blsi32(i32 %x) { ; CHECK-LABEL: blsi32: ; CHECK: # %bb.0: