Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -2686,6 +2686,21 @@ MI->getOperand(3).setImm(Mask ^ Imm); return TargetInstrInfo::commuteInstruction(MI, NewMI); } + case X86::PCLMULQDQrr: + case X86::VPCLMULQDQrr:{ + // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0] + // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0] + unsigned Imm = MI->getOperand(3).getImm(); + unsigned Src1Hi = Imm & 0x01; + unsigned Src2Hi = Imm & 0x10; + if (NewMI) { + MachineFunction &MF = *MI->getParent()->getParent(); + MI = MF.CloneMachineInstr(MI); + NewMI = false; + } + MI->getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4)); + return TargetInstrInfo::commuteInstruction(MI, NewMI); + } case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -7941,6 +7941,7 @@ //===----------------------------------------------------------------------===// // AVX carry-less Multiplication instructions +let isCommutable = 1 in def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, u8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -7957,6 +7958,7 @@ // Carry-less Multiplication instructions let Constraints = "$src1 = $dst" in { +let isCommutable = 1 in def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, u8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", Index: llvm/trunk/test/CodeGen/X86/commute-clmul.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/commute-clmul.ll +++ llvm/trunk/test/CodeGen/X86/commute-clmul.ll @@ -0,0 +1,60 @@ +; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2,+pclmul < %s | FileCheck %s --check-prefix=SSE +; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2,+pclmul < %s | FileCheck %s --check-prefix=AVX + +declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone + +define <2 x i64> @commute_lq_lq(<2 x i64>* %a0, <2 x i64> %a1) #0 { + ;SSE-LABEL: commute_lq_lq + ;SSE: pclmulqdq $0, (%rdi), %xmm0 + ;SSE-NEXT: retq + + ;AVX-LABEL: commute_lq_lq + ;AVX: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 + ;AVX-NEXT: retq + + %1 = load <2 x i64>* %a0 + %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 0) + ret <2 x i64> %2 +} + +define <2 x i64> @commute_lq_hq(<2 x i64>* %a0, <2 x i64> %a1) #0 { + ;SSE-LABEL: commute_lq_hq + ;SSE: pclmulqdq $1, (%rdi), %xmm0 + ;SSE-NEXT: retq + + ;AVX-LABEL: commute_lq_hq + ;AVX: vpclmulqdq $1, (%rdi), %xmm0, %xmm0 + ;AVX-NEXT: retq + + %1 = load <2 x i64>* %a0 + %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 16) + ret <2 x i64> %2 +} + +define <2 x i64> @commute_hq_lq(<2 x i64>* %a0, <2 x i64> %a1) #0 { + ;SSE-LABEL: commute_hq_lq + ;SSE: pclmulqdq $16, (%rdi), %xmm0 + ;SSE-NEXT: retq + + ;AVX-LABEL: commute_hq_lq + ;AVX: vpclmulqdq $16, (%rdi), %xmm0, %xmm0 + ;AVX-NEXT: retq + + %1 = load <2 x i64>* %a0 + %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 1) + ret <2 x i64> %2 +} + +define <2 x i64> @commute_hq_hq(<2 x i64>* %a0, <2 x i64> %a1) #0 { + ;SSE-LABEL: commute_hq_hq + ;SSE: pclmulqdq $17, (%rdi), %xmm0 + ;SSE-NEXT: retq + + ;AVX-LABEL: commute_hq_hq + ;AVX: vpclmulqdq $17, (%rdi), %xmm0, %xmm0 + ;AVX-NEXT: retq + + %1 = load <2 x i64>* %a0 + %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 17) + ret <2 x i64> %2 +}