diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2120,6 +2120,22 @@ llvm::Type *Ty = Src->getType(); ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false); + // For WebAssembly we want to generate the target-specific rotate builtin, + // rather than generating fshl/fshr intrinsics. + if (getTarget().getTriple().isWasm() && + (Ty->isIntegerTy(32) || Ty->isIntegerTy(64))) { + unsigned IID = 0; + if(Ty->isIntegerTy(32)) { + IID = IsRotateRight ? Intrinsic::wasm_rotr_i32 + : Intrinsic::wasm_rotl_i32; + } else if(Ty->isIntegerTy(64)) { + IID = IsRotateRight ? Intrinsic::wasm_rotr_i64 + : Intrinsic::wasm_rotl_i64; + } + llvm::Function *F = CGM.getIntrinsic(IID); + return RValue::get(Builder.CreateCall(F, { Src, ShiftAmt })); + } + // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; Function *F = CGM.getIntrinsic(IID, Ty); diff --git a/clang/test/CodeGen/WebAssembly/wasm-rotate.c b/clang/test/CodeGen/WebAssembly/wasm-rotate.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/WebAssembly/wasm-rotate.c @@ -0,0 +1,53 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple wasm32-unknown-unknown -o - -emit-llvm %s | FileCheck --check-prefix=WEBASSEMBLY32 %s +// RUN: %clang_cc1 -triple wasm64-unknown-unknown -o - -emit-llvm %s | FileCheck --check-prefix=WEBASSEMBLY64 %s + +// WEBASSEMBLY32-LABEL: define i32 @test32 +// WEBASSEMBLY32-SAME: (i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// WEBASSEMBLY32-NEXT: entry: +// WEBASSEMBLY32-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// WEBASSEMBLY32-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// WEBASSEMBLY32-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// WEBASSEMBLY32-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -16711936 +// WEBASSEMBLY32-NEXT: [[TMP1:%.*]] = call i32 @llvm.wasm.rotl.i32(i32 [[AND]], i32 8) +// WEBASSEMBLY32-NEXT: ret i32 [[TMP1]] +// +// WEBASSEMBLY64-LABEL: define i32 @test32 +// WEBASSEMBLY64-SAME: (i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// WEBASSEMBLY64-NEXT: entry: +// WEBASSEMBLY64-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// WEBASSEMBLY64-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// WEBASSEMBLY64-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// WEBASSEMBLY64-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -16711936 +// WEBASSEMBLY64-NEXT: [[TMP1:%.*]] = call i32 @llvm.wasm.rotl.i32(i32 [[AND]], i32 8) +// WEBASSEMBLY64-NEXT: ret i32 [[TMP1]] +// +unsigned int test32(unsigned int x) { + return __builtin_rotateleft32((x & 0xFF00FF00), 8); +} + +// WEBASSEMBLY32-LABEL: define i32 @test64 +// WEBASSEMBLY32-SAME: (i32 noundef [[X:%.*]]) #[[ATTR0]] { +// WEBASSEMBLY32-NEXT: entry: +// WEBASSEMBLY32-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// WEBASSEMBLY32-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// WEBASSEMBLY32-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// WEBASSEMBLY32-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// WEBASSEMBLY32-NEXT: [[AND:%.*]] = and i64 [[CONV]], -71777214294589696 +// WEBASSEMBLY32-NEXT: [[TMP1:%.*]] = call i64 @llvm.wasm.rotl.i64(i64 [[AND]], i64 8) +// WEBASSEMBLY32-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// WEBASSEMBLY32-NEXT: ret i32 [[CONV1]] +// +// WEBASSEMBLY64-LABEL: define i64 @test64 +// WEBASSEMBLY64-SAME: (i64 noundef [[X:%.*]]) #[[ATTR0]] { +// WEBASSEMBLY64-NEXT: entry: +// WEBASSEMBLY64-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// WEBASSEMBLY64-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// WEBASSEMBLY64-NEXT: [[TMP0:%.*]] = load i64, ptr [[X_ADDR]], align 8 +// WEBASSEMBLY64-NEXT: [[AND:%.*]] = and i64 [[TMP0]], -71777214294589696 +// WEBASSEMBLY64-NEXT: [[TMP1:%.*]] = call i64 @llvm.wasm.rotl.i64(i64 [[AND]], i64 8) +// WEBASSEMBLY64-NEXT: ret i64 [[TMP1]] +// +unsigned long test64(unsigned long x) { + return __builtin_rotateleft64((x & 0xFF00FF00FF00FF00L), 8); +} diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -341,4 +341,22 @@ [], [IntrReadMem]>; +//===----------------------------------------------------------------------===// +// Rotate Intrinsics +// These are lowered from the target independent intrinsics to avoid +// funnel shift optimizations +//===----------------------------------------------------------------------===// + +def int_wasm_rotl_i32 : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_wasm_rotr_i32 : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_wasm_rotl_i64 : + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + +def int_wasm_rotr_i64 : + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + } // TargetPrefix = "wasm" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td @@ -107,6 +107,16 @@ def : Pat<(rotl I64:$lhs, (and I64:$rhs, 63)), (ROTL_I64 I64:$lhs, I64:$rhs)>; def : Pat<(rotr I64:$lhs, (and I64:$rhs, 63)), (ROTR_I64 I64:$lhs, I64:$rhs)>; +// Lower the rotate intrinsic to a rotate instruction +def : Pat<(int_wasm_rotl_i32 I32:$lhs, I32:$rhs), + (ROTL_I32 I32:$lhs, I32:$rhs)>; +def : Pat<(int_wasm_rotr_i32 I32:$lhs, I32:$rhs), + (ROTR_I32 I32:$lhs, I32:$rhs)>; +def : Pat<(int_wasm_rotl_i64 I64:$lhs, I64:$rhs), + (ROTL_I64 I64:$lhs, I64:$rhs)>; +def : Pat<(int_wasm_rotr_i64 I64:$lhs, I64:$rhs), + (ROTR_I64 I64:$lhs, I64:$rhs)>; + defm SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond), (outs), (ins), [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))], diff --git a/llvm/test/CodeGen/WebAssembly/rotate-i3264.ll b/llvm/test/CodeGen/WebAssembly/rotate-i3264.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/rotate-i3264.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: sed 's/iX/i32/g' %s | llc --mtriple=wasm32-unknown-unknown | FileCheck --check-prefix=I32 %s +; RUN: sed 's/iX/i64/g' %s | llc --mtriple=wasm64-unknown-unknown | FileCheck --check-prefix=I64 %s + +declare iX @llvm.wasm.rotl.iX(iX, iX) nounwind +declare iX @llvm.wasm.rotr.iX(iX, iX) nounwind + +define iX @testLeft(iX noundef %0, iX noundef %1) { +; I32-LABEL: testLeft: +; I32: .functype testLeft (i32, i32) -> (i32) +; I32-NEXT: # %bb.0: +; I32-NEXT: local.get 0 +; I32-NEXT: local.get 1 +; I32-NEXT: i32.rotl +; I32-NEXT: # fallthrough-return +; +; I64-LABEL: testLeft: +; I64: .functype testLeft (i64, i64) -> (i64) +; I64-NEXT: # %bb.0: +; I64-NEXT: local.get 0 +; I64-NEXT: local.get 1 +; I64-NEXT: i64.rotl +; I64-NEXT: # fallthrough-return + %3 = call iX @llvm.wasm.rotl.iX(iX %0, iX %1) + ret iX %3 +} + +define iX @testRight(iX noundef %0, iX noundef %1) { +; I32-LABEL: testRight: +; I32: .functype testRight (i32, i32) -> (i32) +; I32-NEXT: # %bb.0: +; I32-NEXT: local.get 0 +; I32-NEXT: local.get 1 +; I32-NEXT: i32.rotr +; I32-NEXT: # fallthrough-return +; +; I64-LABEL: testRight: +; I64: .functype testRight (i64, i64) -> (i64) +; I64-NEXT: # %bb.0: +; I64-NEXT: local.get 0 +; I64-NEXT: local.get 1 +; I64-NEXT: i64.rotr +; I64-NEXT: # fallthrough-return + %3 = call iX @llvm.wasm.rotr.iX(iX %0, iX %1) + ret iX %3 +}