HomePhabricator

[InstCombine] relax masking requirement for truncated funnel/rotate match

Authored by spatel on Apr 28 2021, 1:13 PM.

Description

[InstCombine] relax masking requirement for truncated funnel/rotate match

I was investigating a seemingly unrelated improvement in demanded
bits for shift-left, but that caused regressions on these tests
because we were able to look through/eliminate the mask.

https://alive2.llvm.org/ce/z/Ztdr22

define i8 @src(i32 %x, i32 %y, i32 %shift) {
%and = and i32 %shift, 3
%conv = and i32 %x, 255
%shr = lshr i32 %conv, %and
%sub = sub i32 8, %and
%shl = shl i32 %y, %sub
%or = or i32 %shr, %shl
%conv2 = trunc i32 %or to i8
ret i8 %conv2
}

define i8 @tgt(i32 %x, i32 %y, i32 %shift) {
%x8 = trunc i32 %x to i8
%y8 = trunc i32 %y to i8
%shift8 = trunc i32 %shift to i8
%and = and i8 %shift8, 3
%conv2 = call i8 @llvm.fshr.i8(i8 %y8, i8 %x8, i8 %and)
ret i8 %conv2
}

declare i8 @llvm.fshr.i8(i8,i8,i8)

Details

Committed
spatelApr 28 2021, 1:49 PM
Parents
rG9fb946f1a8dd: [InstCombine] add tests for rotate/funnel; NFC
Branches
Unknown
Tags
Unknown

Event Timeline

Carrot added a subscriber: Carrot.May 17 2021, 6:47 PM

This patch generates wrong code for the function

void foo(char *p, int l, int t) {
  *p = static_cast<char>(static_cast<char>(t << (8u - l)) |
                         static_cast<char>(0xffu >> l));
}

_Z3fooPcii:                             # @_Z3fooPcii
        .cfi_startproc
# %bb.0:
        movl    %esi, %ecx
        andb    $7, %cl                                        // it doesn't work for %cl == 8
        shll    $8, %edx
        orl     $255, %edx
                                        # kill: def $cl killed $cl killed $ecx
        shrl    %cl, %edx
        movb    %dl, (%rdi)
        retq

When I pass in l=8, the generated code computes wrong result.

This patch generates wrong code for the function

void foo(char *p, int l, int t) {
  *p = static_cast<char>(static_cast<char>(t << (8u - l)) |
                         static_cast<char>(0xffu >> l));
}

_Z3fooPcii:                             # @_Z3fooPcii
        .cfi_startproc
# %bb.0:
        movl    %esi, %ecx
        andb    $7, %cl                                        // it doesn't work for %cl == 8
        shll    $8, %edx
        orl     $255, %edx
                                        # kill: def $cl killed $cl killed $ecx
        shrl    %cl, %edx
        movb    %dl, (%rdi)
        retq

When I pass in l=8, the generated code computes wrong result.

The bug is in instcombine when checking if the shift amount is sufficiently restricted. Taking a look now.

This patch generates wrong code for the function

Thanks for the test case! This patch should fix it:
6d949a9c8fa4

I'm not sure if we can improve the generated code. If you see a way, please file a bug.

Thank you, Sanjay.
It really fixed our problem in a larger application.