diff --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-sra-load.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK + +; FIXME: fold (sra (load i32), 16)) -> (sextload i16) +define i32 @sra_half(i32* %p) { +; CHECK-LABEL: sra_half: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: sarl $16, %eax +; CHECK-NEXT: retq + %load = load i32, i32* %p + %shift = ashr i32 %load, 16 + ret i32 %shift +} + +; Vector version not folded. +define <4 x i32> @sra_half_vec(<4 x i32>* %p) { +; CHECK-LABEL: sra_half_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa (%rdi), %xmm0 +; CHECK-NEXT: psrad $16, %xmm0 +; CHECK-NEXT: retq + %load = load <4 x i32>, <4 x i32>* %p + %shift = ashr <4 x i32> %load, + ret <4 x i32> %shift +} + +; FIXME: fold (sra (load i64), 48)) -> (sextload i16) +define i64 @sra_large_shift(i64* %r) { +; CHECK-LABEL: sra_large_shift: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: sarq $48, %rax +; CHECK-NEXT: retq + %t0 = load i64, i64* %r + %conv = ashr i64 %t0, 48 + ret i64 %conv +} + +; Negative test, no fold expected. +define i32 @sra_small_shift(i32* %p) { +; CHECK-LABEL: sra_small_shift: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: sarl $8, %eax +; CHECK-NEXT: retq + %load = load i32, i32* %p + %shift = ashr i32 %load, 8 + ret i32 %shift +} + +; This should be folded to a zextload. +define i32 @sra_of_zextload(i16* %p) { +; CHECK-LABEL: sra_of_zextload: +; CHECK: # %bb.0: +; CHECK-NEXT: movzbl 1(%rdi), %eax +; CHECK-NEXT: retq + %load = load i16, i16* %p + %zext = zext i16 %load to i32 + %shift = ashr i32 %zext, 8 + ret i32 %shift +} + +; FIXME: fold (sra (sextload i16 to i32), 8) -> (sextload i8) +define i32 @sra_of_sextload(i16* %p) { +; CHECK-LABEL: sra_of_sextload: +; CHECK: # %bb.0: +; CHECK-NEXT: movswl (%rdi), %eax +; CHECK-NEXT: sarl $8, %eax +; CHECK-NEXT: retq + %load = load i16, i16* %p + %sext = sext i16 %load to i32 + %shift = ashr i32 %sext, 8 + ret i32 %shift +} + +; Negative test. All bits loaded from memory are shifted out, so we can fold +; away the shift. +define i32 @sra_of_sextload_no_fold(i16* %p) { +; CHECK-LABEL: sra_of_sextload_no_fold: +; CHECK: # %bb.0: +; CHECK-NEXT: movswl (%rdi), %eax +; CHECK-NEXT: sarl $16, %eax +; CHECK-NEXT: retq + %load = load i16, i16* %p + %sext = sext i16 %load to i32 + %shift = ashr i32 %sext, 16 + ret i32 %shift +} + +; FIXME: Fold even if SRA has multiple uses. +define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) { +; CHECK-LABEL: sra_to_sextload_multiple_sra_uses: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %ecx +; CHECK-NEXT: sarl $16, %ecx +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: xorl $6, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: retq + %load = load i32, i32* %p + %shift = ashr i32 %load, 16 + %use1 = xor i32 %shift, 6 + %use2 = or i32 %shift, %use1 + ret i32 %use2 +}