Index: lib/Headers/emmintrin.h =================================================================== --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -825,11 +825,31 @@ return __a ^ __b; } -#define _mm_slli_si128(a, count) __extension__ ({ \ - _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ - __m128i __a = (a); \ - _Pragma("clang diagnostic pop"); \ - (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); }) +#define _mm_slli_si128(a, imm) \ + __extension__({ \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wshadow\""); \ + __m128i __a = (a); \ + _Pragma("clang diagnostic pop"); \ + (__m128i) \ + __builtin_shufflevector((__v16qi)_mm_setzero_si128(), (__v16qi)__a, \ + ((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \ + ((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); \ + }) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_slli_epi16(__m128i __a, int __count) @@ -891,12 +911,30 @@ return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); } - -#define _mm_srli_si128(a, count) __extension__ ({ \ - _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ - __m128i __a = (a); \ - _Pragma("clang diagnostic pop"); \ - (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); }) +#define _mm_srli_si128(a, imm) \ + __extension__({ \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wshadow\""); \ + __v16qi __a = (__v16qi)(a); \ + _Pragma("clang diagnostic pop"); \ + (__m128i) __builtin_shufflevector(__a, (__v16qi)_mm_setzero_si128(), \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 0, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 1, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 2, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 3, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 4, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 5, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 6, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 7, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 8, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 9, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \ + ((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); \ + }) static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_srli_epi16(__m128i __a, int __count) Index: test/CodeGen/sse.c =================================================================== --- /dev/null +++ test/CodeGen/sse.c @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -O3 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s + +#include + +// Byte-shifts look reversed due to xmm register layout +__m128 test_mm_slli_si128(__m128 a) { + // CHECK-LABEL: @test_mm_slli_si128 + // CHECK: shufflevector <16 x i8> <{{.*}}, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> {{.*}}, <16 x i32> + return _mm_slli_si128(a, 5); +} + +__m128 test_mm_slli_si128_0(__m128 a) { + // CHECK-LABEL: @test_mm_slli_si128_0 + // CHECK-NOT: shufflevector + return _mm_slli_si128(a, 0); +} + +__m128 test_mm_slli_si128_16(__m128 a) { + // CHECK-LABEL: @test_mm_slli_si128_16 + // CHECK-NOT: shufflevector + return _mm_slli_si128(a, 16); +} + +__m128 test_mm_srli_si128(__m128 a) { + // CHECK-LABEL: @test_mm_srli_si128 + // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> , <16 x i32> + return _mm_srli_si128(a, 5); +} + +__m128 test_mm_srli_si128_0(__m128 a) { + // CHECK-LABEL: @test_mm_srli_si128_0 + // CHECK-NOT: shufflevector + return _mm_srli_si128(a, 0); +} + +__m128 test_mm_srli_si128_16(__m128 a) { + // CHECK-LABEL: @test_mm_srli_si128_16 + // CHECK-NOT: shufflevector + return _mm_srli_si128(a, 16); +}