Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -14905,7 +14905,7 @@ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]}); - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < 3; ++i) { Value *Extract = Builder.CreateExtractValue(Call, i + 1); Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16); Ptr = Builder.CreateBitCast( @@ -14921,7 +14921,7 @@ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]}); - for (int i = 0; i < 7; ++i) { + for (int i = 0; i < 4; ++i) { Value *Extract = Builder.CreateExtractValue(Call, i + 1); Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16); Ptr = Builder.CreateBitCast( Index: clang/test/CodeGen/X86/keylocker.c =================================================================== --- clang/test/CodeGen/X86/keylocker.c +++ clang/test/CodeGen/X86/keylocker.c @@ -98,20 +98,8 @@ // CHECK64-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[TMP5]], i32 32 // CHECK64-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to <2 x i64>* // CHECK64-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* [[TMP14]], align 1 -// CHECK64-NEXT: [[TMP15:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 4 -// CHECK64-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[TMP5]], i32 48 -// CHECK64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to <2 x i64>* -// CHECK64-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[TMP17]], align 1 -// CHECK64-NEXT: [[TMP18:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 5 -// CHECK64-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP5]], i32 64 -// CHECK64-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <2 x i64>* -// CHECK64-NEXT: store <2 x i64> [[TMP18]], <2 x i64>* [[TMP20]], align 1 -// CHECK64-NEXT: [[TMP21:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 6 -// CHECK64-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[TMP5]], i32 80 -// CHECK64-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to <2 x i64>* -// CHECK64-NEXT: store <2 x i64> [[TMP21]], <2 x i64>* [[TMP23]], align 1 -// CHECK64-NEXT: [[TMP24:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 0 -// CHECK64-NEXT: ret i32 [[TMP24]] +// CHECK64-NEXT: [[TMP15:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 0 +// CHECK64-NEXT: ret i32 [[TMP15]] // // CHECK32-LABEL: @test_encodekey128_u32( // CHECK32-NEXT: entry: @@ -145,20 +133,8 @@ // CHECK32-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[TMP5]], i32 32 // CHECK32-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to <2 x i64>* // CHECK32-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* [[TMP14]], align 1 -// CHECK32-NEXT: [[TMP15:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 4 -// CHECK32-NEXT: [[TMP16:%.*]] = getelementptr i8, i8* [[TMP5]], i32 48 -// CHECK32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to <2 x i64>* -// CHECK32-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[TMP17]], align 1 -// CHECK32-NEXT: [[TMP18:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 5 -// CHECK32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[TMP5]], i32 64 -// CHECK32-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to <2 x i64>* -// CHECK32-NEXT: store <2 x i64> [[TMP18]], <2 x i64>* [[TMP20]], align 1 -// CHECK32-NEXT: [[TMP21:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 6 -// CHECK32-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[TMP5]], i32 80 -// CHECK32-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP22]] to <2 x i64>* -// CHECK32-NEXT: store <2 x i64> [[TMP21]], <2 x i64>* [[TMP23]], align 1 -// CHECK32-NEXT: [[TMP24:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 0 -// CHECK32-NEXT: ret i32 [[TMP24]] +// CHECK32-NEXT: [[TMP15:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP6]], 0 +// CHECK32-NEXT: ret i32 [[TMP15]] // unsigned int test_encodekey128_u32(unsigned int htype, __m128i key, void *h) { return _mm_encodekey128_u32(htype, key, h); @@ -206,20 +182,8 @@ // CHECK64-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP7]], i32 48 // CHECK64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to <2 x i64>* // CHECK64-NEXT: store <2 x i64> [[TMP17]], <2 x i64>* [[TMP19]], align 1 -// CHECK64-NEXT: [[TMP20:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 5 -// CHECK64-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[TMP7]], i32 64 -// CHECK64-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to <2 x i64>* -// CHECK64-NEXT: store <2 x i64> [[TMP20]], <2 x i64>* [[TMP22]], align 1 -// CHECK64-NEXT: [[TMP23:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 6 -// CHECK64-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[TMP7]], i32 80 -// CHECK64-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to <2 x i64>* -// CHECK64-NEXT: store <2 x i64> [[TMP23]], <2 x i64>* [[TMP25]], align 1 -// CHECK64-NEXT: [[TMP26:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 7 -// CHECK64-NEXT: [[TMP27:%.*]] = getelementptr i8, i8* [[TMP7]], i32 96 -// CHECK64-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to <2 x i64>* -// CHECK64-NEXT: store <2 x i64> [[TMP26]], <2 x i64>* [[TMP28]], align 1 -// CHECK64-NEXT: [[TMP29:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 0 -// CHECK64-NEXT: ret i32 [[TMP29]] +// CHECK64-NEXT: [[TMP20:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 0 +// CHECK64-NEXT: ret i32 [[TMP20]] // // CHECK32-LABEL: @test_encodekey256_u32( // CHECK32-NEXT: entry: @@ -263,20 +227,8 @@ // CHECK32-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP7]], i32 48 // CHECK32-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to <2 x i64>* // CHECK32-NEXT: store <2 x i64> [[TMP17]], <2 x i64>* [[TMP19]], align 1 -// CHECK32-NEXT: [[TMP20:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 5 -// CHECK32-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[TMP7]], i32 64 -// CHECK32-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to <2 x i64>* -// CHECK32-NEXT: store <2 x i64> [[TMP20]], <2 x i64>* [[TMP22]], align 1 -// CHECK32-NEXT: [[TMP23:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 6 -// CHECK32-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[TMP7]], i32 80 -// CHECK32-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to <2 x i64>* -// CHECK32-NEXT: store <2 x i64> [[TMP23]], <2 x i64>* [[TMP25]], align 1 -// CHECK32-NEXT: [[TMP26:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 7 -// CHECK32-NEXT: [[TMP27:%.*]] = getelementptr i8, i8* [[TMP7]], i32 96 -// CHECK32-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP27]] to <2 x i64>* -// CHECK32-NEXT: store <2 x i64> [[TMP26]], <2 x i64>* [[TMP28]], align 1 -// CHECK32-NEXT: [[TMP29:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 0 -// CHECK32-NEXT: ret i32 [[TMP29]] +// CHECK32-NEXT: [[TMP20:%.*]] = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 0 +// CHECK32-NEXT: ret i32 [[TMP20]] // unsigned int test_encodekey256_u32(unsigned int htype, __m128i key_lo, __m128i key_hi, void *h) { return _mm_encodekey256_u32(htype, key_lo, key_hi, h); Index: llvm/test/CodeGen/X86/keylocker-intrinsics.ll =================================================================== --- llvm/test/CodeGen/X86/keylocker-intrinsics.ll +++ llvm/test/CodeGen/X86/keylocker-intrinsics.ll @@ -36,40 +36,24 @@ define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5) nounwind { ; X64-LABEL: test_encodekey128_u32: ; X64: # %bb.0: # %entry -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; X64-NEXT: encodekey128 %edi, %eax ; X64-NEXT: movaps %xmm0, (%rsi) ; X64-NEXT: movaps %xmm1, (%rdx) ; X64-NEXT: movaps %xmm2, (%rcx) -; X64-NEXT: movaps %xmm4, (%r8) -; X64-NEXT: movaps %xmm5, (%r9) -; X64-NEXT: movaps %xmm6, (%r10) ; X64-NEXT: retq ; ; X32-LABEL: test_encodekey128_u32: ; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: encodekey128 %eax, %eax -; X32-NEXT: vmovaps %xmm0, (%ebp) -; X32-NEXT: vmovaps %xmm1, (%ebx) -; X32-NEXT: vmovaps %xmm2, (%edi) -; X32-NEXT: vmovaps %xmm4, (%esi) -; X32-NEXT: vmovaps %xmm5, (%edx) -; X32-NEXT: vmovaps %xmm6, (%ecx) +; X32-NEXT: vmovaps %xmm0, (%esi) +; X32-NEXT: vmovaps %xmm1, (%edx) +; X32-NEXT: vmovaps %xmm2, (%ecx) ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi -; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp ; X32-NEXT: retl entry: %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key) @@ -79,53 +63,36 @@ store <2 x i64> %2, <2 x i64>* %h1, align 16 %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 store <2 x i64> %3, <2 x i64>* %h2, align 16 - %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 - store <2 x i64> %4, <2 x i64>* %h3, align 16 - %5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 - store <2 x i64> %5, <2 x i64>* %h4, align 16 - %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 - store <2 x i64> %6, <2 x i64>* %h5, align 16 - %7 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 - ret i32 %7 + %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i32 %4 } define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5, <2 x i64>* nocapture readnone %h6) nounwind { ; X64-LABEL: test_encodekey256_u32: ; X64: # %bb.0: # %entry -; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; X64-NEXT: encodekey256 %edi, %eax ; X64-NEXT: movaps %xmm0, (%rsi) ; X64-NEXT: movaps %xmm1, (%rdx) ; X64-NEXT: movaps %xmm2, (%rcx) ; X64-NEXT: movaps %xmm3, (%r8) -; X64-NEXT: movaps %xmm4, (%r9) -; X64-NEXT: movaps %xmm5, (%r10) ; X64-NEXT: retq ; ; X32-LABEL: test_encodekey256_u32: ; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: encodekey256 %eax, %eax -; X32-NEXT: vmovaps %xmm0, (%ebp) -; X32-NEXT: vmovaps %xmm1, (%ebx) -; X32-NEXT: vmovaps %xmm2, (%edi) -; X32-NEXT: vmovaps %xmm3, (%esi) -; X32-NEXT: vmovaps %xmm4, (%edx) -; X32-NEXT: vmovaps %xmm5, (%ecx) +; X32-NEXT: vmovaps %xmm0, (%edi) +; X32-NEXT: vmovaps %xmm1, (%esi) +; X32-NEXT: vmovaps %xmm2, (%edx) +; X32-NEXT: vmovaps %xmm3, (%ecx) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi -; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp ; X32-NEXT: retl entry: %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi) @@ -137,12 +104,8 @@ store <2 x i64> %3, <2 x i64>* %h2, align 16 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 store <2 x i64> %4, <2 x i64>* %h3, align 16 - %5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 - store <2 x i64> %5, <2 x i64>* %h4, align 16 - %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 - store <2 x i64> %6, <2 x i64>* %h5, align 16 - %7 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 - ret i32 %7 + %5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 + ret i32 %5 } define i8 @test_mm_aesenc128kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) {