Index: lib/Target/NVPTX/NVPTXInstrInfo.cpp =================================================================== --- lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -37,8 +37,18 @@ const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); - if (DestRC->getSize() != SrcRC->getSize()) - report_fatal_error("Copy one register into another with a different width"); + if (DestRC->getSize() != SrcRC->getSize()) { + // If the sizes differ it may be possible we are copying a i16 to a i32 + // register. + if (DestRC == &NVPTX::Int32RegsRegClass && + SrcRC == &NVPTX::Int16RegsRegClass) { + BuildMI(MBB, I, DL, get(NVPTX::CVT_u32_u16), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; + } + report_fatal_error("Invalid register copy - only 2-byte to 4-byte integer or same bitwidth copies are allowed"); + } unsigned Op; if (DestRC == &NVPTX::Int1RegsRegClass) { Index: test/CodeGen/NVPTX/reg-copy-int.ll =================================================================== --- /dev/null +++ test/CodeGen/NVPTX/reg-copy-int.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -O3 -march=nvptx64 -mcpu=sm_35 | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-unknown-unknown" + +; CHECK-LABEL ex( +define void @ex(i8 addrspace(1)* noalias readonly dereferenceable(1) %data, i8 addrspace(1)* noalias dereferenceable(1) %res, i8 %op) { +entry: + %opext = zext i8 %op to i32 + ; CHECK: ld.global.nc.u8 {{.*}}[[r1:%.+]], [%r{{.+}}]; + ; CHECK: cvt.u32.u16 {{.*}}%r{{.+}}, [[r1]]; + %val = load i8, i8 addrspace(1)* %data, align 1 + %valext = zext i8 %val to i32 + %resval = add nuw nsw i32 %valext, %opext + %restrunc = trunc i32 %resval to i8 + store i8 %restrunc, i8 addrspace(1)* %res + ret void; +} + +!nvvm.annotations = !{!0} +!0 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8)* @ex, !"kernel", i32 1}