Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -622,7 +622,6 @@ case TargetOpcode::G_OR: case TargetOpcode::G_XOR: case TargetOpcode::G_SUB: - case TargetOpcode::G_SHL: // Perform operation at larger width (any extension is fine here, high bits // don't affect the result) and then truncate the result back to the // original type. @@ -632,15 +631,32 @@ MIRBuilder.recordInsertion(&MI); return Legalized; + case TargetOpcode::G_SHL: + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + // The "number of bits to shift" operand must preserve its value as an + // unsigned integer: + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + case TargetOpcode::G_SDIV: case TargetOpcode::G_SREM: - case TargetOpcode::G_ASHR: widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); widenScalarDst(MI, WideTy); MIRBuilder.recordInsertion(&MI); return Legalized; + case TargetOpcode::G_ASHR: + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); + // The "number of bits to shift" operand must preserve its value as an + // unsigned integer: + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + widenScalarDst(MI, WideTy); + MIRBuilder.recordInsertion(&MI); + return Legalized; + case TargetOpcode::G_UDIV: case TargetOpcode::G_UREM: case TargetOpcode::G_LSHR: Index: test/CodeGen/AArch64/GlobalISel/legalize-shift.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -30,26 +30,27 @@ ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]] ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[TRUNC1]], [[C1]] - ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]] - ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[ASHR1]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ASHR2]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[C1]] + ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[AND]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ASHR1]](s32) ; CHECK: $w0 = COPY [[COPY2]](s32) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C2]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC2]], [[C2]] ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C3]] - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[AND1]] + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC3]], [[C3]] + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[AND2]] ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: $w0 = COPY [[COPY3]](s32) ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY1]]0, [[COPY1]]1 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]]2(s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[TRUNC5]], [[C4]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[TRUNC4]], [[AND3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) ; CHECK: $w0 = COPY [[COPY4]](s32) %0(s64) = COPY $x0 %1(s64) = COPY $x1 Index: test/CodeGen/X86/GlobalISel/ashr-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/ashr-scalar.ll +++ test/CodeGen/X86/GlobalISel/ashr-scalar.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64 define i64 @test_ashr_i64(i64 %arg1, i64 %arg2) { @@ -153,8 +152,7 @@ ; X64: # %bb.0: ; X64-NEXT: shlb $7, %dil ; X64-NEXT: sarb $7, %dil -; X64-NEXT: shlb $7, %sil -; X64-NEXT: sarb $7, %sil +; X64-NEXT: andb $1, %sil ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: sarb %cl, %dil ; X64-NEXT: movl %edi, %eax @@ -171,8 +169,7 @@ ; X64-NEXT: movb $-1, %cl ; X64-NEXT: shlb $7, %dil ; X64-NEXT: sarb $7, %dil -; X64-NEXT: shlb $7, %cl -; X64-NEXT: sarb $7, %cl +; X64-NEXT: andb $1, %cl ; X64-NEXT: sarb %cl, %dil ; X64-NEXT: movl %edi, %eax ; X64-NEXT: retq Index: test/CodeGen/X86/GlobalISel/shl-scalar.ll =================================================================== --- test/CodeGen/X86/GlobalISel/shl-scalar.ll +++ test/CodeGen/X86/GlobalISel/shl-scalar.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=X64 define i64 @test_shl_i64(i64 %arg1, i64 %arg2) { @@ -151,6 +150,7 @@ define i1 @test_shl_i1(i32 %arg1, i32 %arg2) { ; X64-LABEL: test_shl_i1: ; X64: # %bb.0: +; X64-NEXT: andb $1, %sil ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: shlb %cl, %dil ; X64-NEXT: movl %edi, %eax @@ -165,6 +165,7 @@ ; X64-LABEL: test_shl_i1_imm1: ; X64: # %bb.0: ; X64-NEXT: movb $-1, %cl +; X64-NEXT: andb $1, %cl ; X64-NEXT: shlb %cl, %dil ; X64-NEXT: movl %edi, %eax ; X64-NEXT: retq @@ -172,3 +173,69 @@ %res = shl i1 %a, 1 ret i1 %res } + +define i16 @test_shl_i4(i16 %v, i16 %a, i16 %b) { +; Let's say the arguments are the following unsigned +; integers in two’s complement representation: +; +; %v: 77 (0000 0000 0100 1101) +; %a: 74 (0000 0000 0100 1010) +; %b: 72 (0000 0000 0100 1000) + %v.t = trunc i16 %v to i4 ; %v.t: 13 (1101) + %a.t = trunc i16 %a to i4 ; %a.t: 10 (1010) + %b.t = trunc i16 %b to i4 ; %b.t: 8 (1000) + %n.t = add i4 %a.t, %b.t ; %n.t: 2 (0010) + %r.t = shl i4 %v.t, %n.t ; %r.t: 4 (0100) + %r = zext i4 %r.t to i16 +; %r: 4 (0000 0000 0000 0100) + ret i16 %r + +; X64-LABEL: test_shl_i4 +; +; %di: 77 (0000 0000 0100 1101) +; %si: 74 (0000 0000 0100 1010) +; %dx: 72 (0000 0000 0100 1000) +; +; X64: # %bb.0: +; +; X64-NEXT: addb %sil, %dl +; %dx: 146 (0000 0000 1001 0010) +; +; X64-NEXT: andb $15, %dl +; %dx: 2 (0000 0000 0000 0010) +; +; X64-NEXT: movl %edx, %ecx +; %cx: 2 (0000 0000 0000 0010) +; +; X64-NEXT: shlb %cl, %dil +; %di: 52 (0000 0000 0011 0100) +; +; X64-NEXT: andw $15, %di +; %di: 4 (0000 0000 0000 0100) +; +; X64-NEXT: movl %edi, %eax +; %ax: 4 (0000 0000 0000 0100) +; +; X64-NEXT: retq +; +; Let's pretend that legalizing G_SHL by widening its second +; source operand is done via G_ANYEXT rather than G_ZEXT and +; see what happens: +; +; addb %sil, %dl +; %dx: 146 (0000 0000 1001 0010) +; +; movl %edx, %ecx +; %cx: 146 (0000 0000 1001 0010) +; +; shlb %cl, %dil +; %di: 0 (0000 0000 0000 0000) +; +; andw $15, %di +; %di: 0 (0000 0000 0000 0000) +; +; movl %edi, %eax +; %ax: 0 (0000 0000 0000 0000) +; +; retq +}