diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13926,25 +13926,15 @@ } case X86::BI__shiftleft128: case X86::BI__shiftright128: { - // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this: - // llvm::Function *F = CGM.getIntrinsic( - // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, - // Int64Ty); - // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); - // return Builder.CreateCall(F, Ops); - llvm::Type *Int128Ty = Builder.getInt128Ty(); - Value *HighPart128 = - Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64); - Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty); - Value *Val = Builder.CreateOr(HighPart128, LowPart128); - Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), - llvm::ConstantInt::get(Int128Ty, 0x3f)); - Value *Res; - if (BuiltinID == X86::BI__shiftleft128) - Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64); - else - Res = Builder.CreateLShr(Val, Amt); - return Builder.CreateTrunc(Res, Int64Ty); + llvm::Function *F = CGM.getIntrinsic( + BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, + Int64Ty); + // Flip low/high ops and zero-extend amount to matching type. + // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt) + // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt) + std::swap(Ops[0], Ops[1]); + Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); + return Builder.CreateCall(F, Ops); } case X86::BI_ReadWriteBarrier: case X86::BI_ReadBarrier: diff --git a/clang/test/CodeGen/X86/ms-x86-intrinsics.c b/clang/test/CodeGen/X86/ms-x86-intrinsics.c --- a/clang/test/CodeGen/X86/ms-x86-intrinsics.c +++ b/clang/test/CodeGen/X86/ms-x86-intrinsics.c @@ -144,14 +144,8 @@ return __shiftleft128(l, h, d); } // CHECK-X64-LABEL: define dso_local i64 @test__shiftleft128(i64 %l, i64 %h, i8 %d) -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = shl nuw i128 %{{.*}}, 64 -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = or i128 % -// CHECK-X64: = and i8 %{{.*}}, 63 -// CHECK-X64: = shl i128 % -// CHECK-X64: = lshr i128 % -// CHECK-X64: = trunc i128 % +// CHECK-X64: = zext i8 %{{.*}} to i64 +// CHECK-X64: = tail call i64 @llvm.fshl.i64(i64 %h, i64 %l, i64 %{{.*}}) // CHECK-X64: ret i64 % unsigned __int64 test__shiftright128(unsigned __int64 l, unsigned __int64 h, @@ -159,13 +153,8 @@ return __shiftright128(l, h, d); } // CHECK-X64-LABEL: define dso_local i64 @test__shiftright128(i64 %l, i64 %h, i8 %d) -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = shl nuw i128 %{{.*}}, 64 -// CHECK-X64: = zext i64 %{{.*}} to i128 -// CHECK-X64: = or i128 % -// CHECK-X64: = and i8 %{{.*}}, 63 -// CHECK-X64: = lshr i128 % -// CHECK-X64: = trunc i128 % +// CHECK-X64: = zext i8 %{{.*}} to i64 +// CHECK-X64: = tail call i64 @llvm.fshr.i64(i64 %h, i64 %l, i64 %{{.*}}) // CHECK-X64: ret i64 % #endif // defined(__x86_64__)