Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -1627,7 +1627,13 @@ def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; -// Helper imms that check if a mask doesn't change significant shift bits. +// Helper imms to check if a mask doesn't change significant shift/rotate bits. +def immShift8 : ImmLeaf(Imm) >= 3; +}]>; +def immShift16 : ImmLeaf(Imm) >= 4; +}]>; def immShift32 : ImmLeaf(Imm) >= 5; }]>; @@ -1661,8 +1667,38 @@ defm : MaskedShiftAmountPats; defm : MaskedShiftAmountPats; defm : MaskedShiftAmountPats; -defm : MaskedShiftAmountPats; -defm : MaskedShiftAmountPats; + +// ROL/ROR instructions allow a stronger mask optimization than shift for 8- and +// 16-bit. We can remove a mask of any (bitwidth - 1) on the rotation amount +// because over-rotating produces the same result. This is noted in the Intel +// docs with: "tempCOUNT <- (COUNT & COUNTMASK) MOD SIZE". Masking the rotation +// amount could affect EFLAGS results, but that does not matter because we are +// not tracking flags for these nodes. +multiclass MaskedRotateAmountPats { + // (rot x (and y, BitWidth - 1)) ==> (rot x, y) + def : Pat<(frag GR8:$src1, (and CL, immShift8)), + (!cast(name # "8rCL") GR8:$src1)>; + def : Pat<(frag GR16:$src1, (and CL, immShift16)), + (!cast(name # "16rCL") GR16:$src1)>; + def : Pat<(frag GR32:$src1, (and CL, immShift32)), + (!cast(name # "32rCL") GR32:$src1)>; + def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift8)), addr:$dst), + (!cast(name # "8mCL") addr:$dst)>; + def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift16)), addr:$dst), + (!cast(name # "16mCL") addr:$dst)>; + def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), + (!cast(name # "32mCL") addr:$dst)>; + + // (rot x (and y, 63)) ==> (rot x, y) + def : Pat<(frag GR64:$src1, (and CL, immShift64)), + (!cast(name # "64rCL") GR64:$src1)>; + def : Pat<(store (frag (loadi64 addr:$dst), (and CL, immShift64)), addr:$dst), + (!cast(name # "64mCL") addr:$dst)>; +} + + +defm : MaskedRotateAmountPats; +defm : MaskedRotateAmountPats; // Double shift amount is implicitly masked. multiclass MaskedDoubleShiftAmountPats { Index: test/CodeGen/X86/rotate4.ll =================================================================== --- test/CodeGen/X86/rotate4.ll +++ test/CodeGen/X86/rotate4.ll @@ -144,7 +144,6 @@ define i8 @rotate_left_8(i8 %x, i32 %amount) { ; CHECK-LABEL: rotate_left_8: ; CHECK: # BB#0: -; CHECK-NEXT: andb $7, %sil ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: rolb %cl, %dil ; CHECK-NEXT: movl %edi, %eax @@ -162,7 +161,6 @@ define i8 @rotate_right_8(i8 %x, i32 %amount) { ; CHECK-LABEL: rotate_right_8: ; CHECK: # BB#0: -; CHECK-NEXT: andb $7, %sil ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: rorb %cl, %dil ; CHECK-NEXT: movl %edi, %eax @@ -180,7 +178,6 @@ define i16 @rotate_left_16(i16 %x, i32 %amount) { ; CHECK-LABEL: rotate_left_16: ; CHECK: # BB#0: -; CHECK-NEXT: andb $15, %sil ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: rolw %cl, %di ; CHECK-NEXT: movl %edi, %eax @@ -198,7 +195,6 @@ define i16 @rotate_right_16(i16 %x, i32 %amount) { ; CHECK-LABEL: rotate_right_16: ; CHECK: # BB#0: -; CHECK-NEXT: andb $15, %sil ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: rorw %cl, %di ; CHECK-NEXT: movl %edi, %eax @@ -216,7 +212,6 @@ define void @rotate_left_m8(i8* %p, i32 %amount) { ; CHECK-LABEL: rotate_left_m8: ; CHECK: # BB#0: -; CHECK-NEXT: andb $7, %sil ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: rolb %cl, (%rdi) ; CHECK-NEXT: retq @@ -235,7 +230,6 @@ define void @rotate_right_m8(i8* %p, i32 %amount) { ; CHECK-LABEL: rotate_right_m8: ; CHECK: # BB#0: -; CHECK-NEXT: andb $7, %sil ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: rorb %cl, (%rdi) ; CHECK-NEXT: retq @@ -254,7 +248,6 @@ define void @rotate_left_m16(i16* %p, i32 %amount) { ; CHECK-LABEL: rotate_left_m16: ; CHECK: # BB#0: -; CHECK-NEXT: andb $15, %sil ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: rolw %cl, (%rdi) ; CHECK-NEXT: retq @@ -273,7 +266,6 @@ define void @rotate_right_m16(i16* %p, i32 %amount) { ; CHECK-LABEL: rotate_right_m16: ; CHECK: # BB#0: -; CHECK-NEXT: andb $15, %sil ; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: rorw %cl, (%rdi) ; CHECK-NEXT: retq