Skip to content

Commit 33a1776

Browse files
author
Sean Fertile
committedJan 9, 2018
[PowerPC] Can not assume an intrinsic argument is a simple type.
The CTRLoop pass performs checks on the argument of certain libcalls/intrinsics, and assumes the arguments must be of a simple type. This isn't always the case though. For example if we unroll and vectorize a loop we may end up with vectors larger then the largest legal type, along with intrinsics that operate on those wider types. This happened in the ffmpeg build, where we unrolled a loop and ended up with a sqrt intrinsic that operated on V16f64, triggering an assertion. Differential Revision: https://reviews.llvm.org/D41758 llvm-svn: 322055
1 parent 9c7ba8e commit 33a1776

File tree

2 files changed

+67
-6
lines changed

2 files changed

+67
-6
lines changed
 

Diff for: ‎llvm/lib/Target/PowerPC/PPCCTRLoops.cpp

+7-6
Original file line numberDiff line numberDiff line change
@@ -403,15 +403,16 @@ bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
403403
}
404404

405405
if (Opcode) {
406-
MVT VTy = TLI->getSimpleValueType(
407-
*DL, CI->getArgOperand(0)->getType(), true);
408-
if (VTy == MVT::Other)
406+
EVT EVTy =
407+
TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true);
408+
409+
if (EVTy == MVT::Other)
409410
return true;
410411

411-
if (TLI->isOperationLegalOrCustom(Opcode, VTy))
412+
if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
412413
continue;
413-
else if (VTy.isVector() &&
414-
TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
414+
else if (EVTy.isVector() &&
415+
TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
415416
continue;
416417

417418
return true;

Diff for: ‎llvm/test/CodeGen/PowerPC/non-simple-args-intrin.ll

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; RUN: llc -verify-machineinstrs <%s | FileCheck %s
2+
target datalayout = "e-m:e-i64:64-n32:64"
3+
target triple = "powerpc64le-unknown-linux-gnu"
4+
5+
; Ensure that that the CTRLoop pass can compile intrinsics with
6+
; non-simple arguments. eg: @llvm.sqrt.v16f64.
7+
8+
; Function Attrs: nounwind
9+
define void @filter_prewitt() {
10+
; CHECK-LABEL: filter_prewitt:
11+
entry:
12+
br label %vector.body
13+
14+
vector.body: ; preds = %vector.body, %entry
15+
%wide.load = load <16 x i8>, <16 x i8>* undef, align 1, !tbaa !1, !alias.scope !4
16+
%0 = zext <16 x i8> %wide.load to <16 x i32>
17+
%wide.load279 = load <16 x i8>, <16 x i8>* undef, align 1, !tbaa !1, !alias.scope !4
18+
%1 = zext <16 x i8> %wide.load279 to <16 x i32>
19+
%2 = add nuw nsw <16 x i32> %1, %0
20+
%3 = add nuw nsw <16 x i32> %2, zeroinitializer
21+
%4 = sub nsw <16 x i32> zeroinitializer, %3
22+
%5 = add nsw <16 x i32> %4, zeroinitializer
23+
%6 = add nsw <16 x i32> %5, zeroinitializer
24+
%7 = sub nsw <16 x i32> zeroinitializer, %0
25+
%8 = sub nsw <16 x i32> %7, zeroinitializer
26+
%9 = add nsw <16 x i32> %8, zeroinitializer
27+
%10 = sub nsw <16 x i32> %9, zeroinitializer
28+
%11 = add nsw <16 x i32> %10, zeroinitializer
29+
%12 = mul nsw <16 x i32> %6, %6
30+
%13 = mul nsw <16 x i32> %11, %11
31+
%14 = add nuw nsw <16 x i32> %13, %12
32+
%15 = sitofp <16 x i32> %14 to <16 x double>
33+
%16 = call nsz <16 x double> @llvm.sqrt.v16f64(<16 x double> %15)
34+
%17 = fmul nsz <16 x double> %16, undef
35+
%18 = fadd nsz <16 x double> %17, undef
36+
%19 = fptosi <16 x double> %18 to <16 x i32>
37+
%20 = sub nsw <16 x i32> zeroinitializer, %19
38+
%21 = ashr <16 x i32> %20, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
39+
%22 = select <16 x i1> undef, <16 x i32> %21, <16 x i32> %19
40+
%23 = trunc <16 x i32> %22 to <16 x i8>
41+
store <16 x i8> %23, <16 x i8>* undef, align 1, !tbaa !1, !alias.scope !7, !noalias !9
42+
br label %vector.body
43+
}
44+
45+
; Function Attrs: nounwind readnone speculatable
46+
declare <16 x double> @llvm.sqrt.v16f64(<16 x double>) #1
47+
48+
attributes #1 = { nounwind readnone speculatable }
49+
50+
!1 = !{!2, !2, i64 0}
51+
!2 = !{!"omnipotent char", !3, i64 0}
52+
!3 = !{!"Simple C/C++ TBAA"}
53+
!4 = !{!5}
54+
!5 = distinct !{!5, !6}
55+
!6 = distinct !{!6, !"LVerDomain"}
56+
!7 = !{!8}
57+
!8 = distinct !{!8, !6}
58+
!9 = !{!10, !11, !5}
59+
!10 = distinct !{!10, !6}
60+
!11 = distinct !{!11, !6}

0 commit comments

Comments
 (0)
Please sign in to comment.