Index: llvm/trunk/lib/Target/Mips/MipsFastISel.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsFastISel.cpp +++ llvm/trunk/lib/Target/Mips/MipsFastISel.cpp @@ -1313,6 +1313,70 @@ switch (II->getIntrinsicID()) { default: return false; + case Intrinsic::bswap: { + Type *RetTy = II->getCalledFunction()->getReturnType(); + + MVT VT; + if (!isTypeSupported(RetTy, VT)) + return false; + + unsigned SrcReg = getRegForValue(II->getOperand(0)); + if (SrcReg == 0) + return false; + unsigned DestReg = createResultReg(&Mips::GPR32RegClass); + if (DestReg == 0) + return false; + if (VT == MVT::i16) { + if (Subtarget->hasMips32r2()) { + emitInst(Mips::WSBH, DestReg).addReg(SrcReg); + updateValueMap(II, DestReg); + return true; + } else { + unsigned TempReg[3]; + for (int i = 0; i < 3; i++) { + TempReg[i] = createResultReg(&Mips::GPR32RegClass); + if (TempReg[i] == 0) + return false; + } + emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8); + emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8); + emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]); + emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF); + updateValueMap(II, DestReg); + return true; + } + } else if (VT == MVT::i32) { + if (Subtarget->hasMips32r2()) { + unsigned TempReg = createResultReg(&Mips::GPR32RegClass); + emitInst(Mips::WSBH, TempReg).addReg(SrcReg); + emitInst(Mips::ROTR, DestReg).addReg(TempReg).addImm(16); + updateValueMap(II, DestReg); + return true; + } else { + unsigned TempReg[8]; + for (int i = 0; i < 8; i++) { + TempReg[i] = createResultReg(&Mips::GPR32RegClass); + if (TempReg[i] == 0) + return false; + } + + emitInst(Mips::SRL, TempReg[0]).addReg(SrcReg).addImm(8); + emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(24); + emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[0]).addImm(0xFF00); + emitInst(Mips::OR, TempReg[3]).addReg(TempReg[1]).addReg(TempReg[2]); + + emitInst(Mips::ANDi, TempReg[4]).addReg(SrcReg).addImm(0xFF00); + emitInst(Mips::SLL, TempReg[5]).addReg(TempReg[4]).addImm(8); + + emitInst(Mips::SLL, TempReg[6]).addReg(SrcReg).addImm(24); + emitInst(Mips::OR, TempReg[7]).addReg(TempReg[3]).addReg(TempReg[5]); + emitInst(Mips::OR, DestReg).addReg(TempReg[6]).addReg(TempReg[7]); + updateValueMap(II, DestReg); + return true; + } + } + return false; + } case Intrinsic::memcpy: case Intrinsic::memmove: { const auto *MTI = cast(II); Index: llvm/trunk/test/CodeGen/Mips/Fast-ISel/bswap1.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/Fast-ISel/bswap1.ll +++ llvm/trunk/test/CodeGen/Mips/Fast-ISel/bswap1.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \ +; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=32R1 +; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \ +; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=32R2 + +@a = global i16 -21829, align 2 +@b = global i32 -1430532899, align 4 +@a1 = common global i16 0, align 2 +@b1 = common global i32 0, align 4 + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) + +define void @b16() { + ; ALL-LABEL: b16: + + ; ALL: lw $[[A_ADDR:[0-9]+]], %got(a)($[[GOT_ADDR:[0-9]+]]) + ; ALL: lhu $[[A_VAL:[0-9]+]], 0($[[A_ADDR]]) + + ; 32R1: sll $[[TMP1:[0-9]+]], $[[A_VAL]], 8 + ; 32R1: srl $[[TMP2:[0-9]+]], $[[A_VAL]], 8 + ; 32R1: or $[[TMP3:[0-9]+]], $[[TMP1]], $[[TMP2]] + ; 32R1: andi $[[TMP4:[0-9]+]], $[[TMP3]], 65535 + + ; 32R2: wsbh $[[RESULT:[0-9]+]], $[[A_VAL]] + + %1 = load i16, i16* @a, align 2 + %2 = call i16 @llvm.bswap.i16(i16 %1) + store i16 %2, i16* @a1, align 2 + ret void +} + +define void @b32() { + ; ALL-LABEL: b32: + + ; ALL: lw $[[B_ADDR:[0-9]+]], %got(b)($[[GOT_ADDR:[0-9]+]]) + ; ALL: lw $[[B_VAL:[0-9]+]], 0($[[B_ADDR]]) + + ; 32R1: srl $[[TMP1:[0-9]+]], $[[B_VAL]], 8 + ; 32R1: srl $[[TMP2:[0-9]+]], $[[B_VAL]], 24 + ; 32R1: andi $[[TMP3:[0-9]+]], $[[TMP1]], 65280 + ; 32R1: or $[[TMP4:[0-9]+]], $[[TMP2]], $[[TMP3]] + ; 32R1: andi $[[TMP5:[0-9]+]], $[[B_VAL]], 65280 + ; 32R1: sll $[[TMP6:[0-9]+]], $[[TMP5]], 8 + ; 32R1: sll $[[TMP7:[0-9]+]], $[[B_VAL]], 24 + ; 32R1: or $[[TMP8:[0-9]+]], $[[TMP4]], $[[TMP6]] + ; 32R1: or $[[RESULT:[0-9]+]], $[[TMP7]], $[[TMP8]] + + ; 32R2: wsbh $[[TMP:[0-9]+]], $[[B_VAL]] + ; 32R2: rotr $[[RESULT:[0-9]+]], $[[TMP]], 16 + + %1 = load i32, i32* @b, align 4 + %2 = call i32 @llvm.bswap.i32(i32 %1) + store i32 %2, i32* @b1, align 4 + ret void +}