Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -335,10 +335,14 @@ if (ST.has16BitInsts()) { Shifts.legalFor({{S16, S32}, {S16, S16}}); Shifts.clampScalar(0, S16, S64); - } else + Shifts.widenScalarToNextPow2(0, 16); + } else { Shifts.clampScalar(0, S32, S64); + Shifts.widenScalarToNextPow2(0, 32); + } Shifts.clampScalar(1, S32, S32); + // FIXME: When RegBankSelect inserts copies, it will only create new // registers with scalar types. This means we can end up with // G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer Index: test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -272,6 +272,36 @@ bb.0: liveins: $vgpr0, $vgpr1 + ; SI-LABEL: name: test_shl_i8_i8 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: $vgpr0 = COPY [[COPY4]](s32) + ; VI-LABEL: name: test_shl_i8_i8 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s32) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_shl_i8_i8 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -280,3 +310,83 @@ %5:_(s32) = G_ANYEXT %4 $vgpr0 = COPY %5 ... + +--- +name: test_shl_i7_i7 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_shl_i7_i7 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND]](s32) + ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: $vgpr0 = COPY [[COPY4]](s32) + ; VI-LABEL: name: test_shl_i7_i7 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s32) + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-LABEL: name: test_shl_i7_i7 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s7) = G_TRUNC %0 + %3:_(s7) = G_TRUNC %1 + %4:_(s7) = G_SHL %2, %3 + %5:_(s32) = G_ANYEXT %4 + $vgpr0 = COPY %5 +... + +--- +name: test_shl_i24_i32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; SI-LABEL: name: test_shl_i24_i32 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32) + ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; SI: $vgpr0 = COPY [[COPY3]](s32) + ; VI-LABEL: name: test_shl_i24_i32 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32) + ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; VI: $vgpr0 = COPY [[COPY3]](s32) + ; GFX9-LABEL: name: test_shl_i24_i32 + ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; GFX9: $vgpr0 = COPY [[COPY3]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s24) = G_TRUNC %0 + %3:_(s24) = G_SHL %2, %1 + %4:_(s32) = G_ANYEXT %3 + $vgpr0 = COPY %4 +...