Index: lib/Target/R600/SIISelLowering.h =================================================================== --- lib/Target/R600/SIISelLowering.h +++ lib/Target/R600/SIISelLowering.h @@ -61,6 +61,9 @@ public: SITargetLowering(TargetMachine &tm); + bool isShuffleMaskLegal(const SmallVectorImpl &/*Mask*/, + EVT /*VT*/) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; Index: lib/Target/R600/SIISelLowering.cpp =================================================================== --- lib/Target/R600/SIISelLowering.cpp +++ lib/Target/R600/SIISelLowering.cpp @@ -117,6 +117,8 @@ setOperationAction(ISD::SETCC, MVT::v2i1, Expand); setOperationAction(ISD::SETCC, MVT::v4i1, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); @@ -259,6 +261,13 @@ // TargetLowering queries //===----------------------------------------------------------------------===// +bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl &, + EVT) const { + // SI has some legal vector types, but no legal vector operations. Say no + // shuffles are legal in order to prefer scalarizing some vector operations. + return false; +} + // FIXME: This really needs an address space argument. The immediate offset // size is different for different sets of memory instruction sets. Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -3090,6 +3090,13 @@ (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1) >; +def : Pat < + (i32 (bswap i32:$a)), + (V_BFI_B32 (S_MOV_B32 0x00ff00ff), + (V_ALIGNBIT_B32 $a, $a, 24), + (V_ALIGNBIT_B32 $a, $a, 8)) +>; + //============================================================================// // Miscellaneous Optimization Patterns //============================================================================// Index: test/CodeGen/R600/bswap.ll =================================================================== --- test/CodeGen/R600/bswap.ll +++ test/CodeGen/R600/bswap.ll @@ -1,12 +1,21 @@ -; RUN: llc -march=r600 -mcpu=SI < %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.bswap.i32(i32) nounwind readnone declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone +declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) nounwind readnone declare i64 @llvm.bswap.i64(i64) nounwind readnone declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone +; FUNC-LABEL: @test_bswap_i32 +; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]] +; SI-DAG: V_ALIGNBIT_B32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8 +; SI-DAG: V_ALIGNBIT_B32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24 +; SI-DAG: S_MOV_B32 [[K:s[0-9]+]], 0xff00ff +; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]] +; SI: BUFFER_STORE_DWORD [[RESULT]] +; SI: S_ENDPGM define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { %val = load i32 addrspace(1)* %in, align 4 %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone @@ -14,6 +23,14 @@ ret void } +; FUNC-LABEL: @test_bswap_v2i32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI: S_ENDPGM define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind { %val = load <2 x i32> addrspace(1)* %in, align 8 %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone @@ -21,6 +38,20 @@ ret void } +; FUNC-LABEL: @test_bswap_v4i32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI: S_ENDPGM define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind { %val = load <4 x i32> addrspace(1)* %in, align 16 %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone @@ -28,6 +59,39 @@ ret void } +; FUNC-LABEL: @test_bswap_v8i32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_ALIGNBIT_B32 +; SI-DAG: V_BFI_B32 +; SI: S_ENDPGM +define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind { + %val = load <8 x i32> addrspace(1)* %in, align 32 + %bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone + store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32 + ret void +} + define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { %val = load i64 addrspace(1)* %in, align 8 %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone