Skip to content

Commit 048cc97

Browse files
committedOct 14, 2015
[PowerPC] Fix invalid lxvdsx optimization (PR25157)
PR25157 identifies a bug where a load plus a vector shuffle is incorrectly converted into an LXVDSX instruction. That optimization is only valid if the load is of a doubleword, and in the noted case, it was not. This corrects that problem. Joint patch with Eric Schweitz, who provided the bugpoint-reduced test case. llvm-svn: 250324
1 parent 147bb89 commit 048cc97

File tree

2 files changed

+60
-0
lines changed

2 files changed

+60
-0
lines changed
 

‎llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -2799,6 +2799,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
27992799
SDValue Base, Offset;
28002800

28012801
if (LD->isUnindexed() &&
2802+
(LD->getMemoryVT() == MVT::f64 ||
2803+
LD->getMemoryVT() == MVT::i64) &&
28022804
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
28032805
SDValue Chain = LD->getChain();
28042806
SDValue Ops[] = { Base, Offset, Chain };

‎llvm/test/CodeGen/PowerPC/pr25157.ll

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
2+
3+
; Verify correct generation of an lxsspx rather than an invalid optimization
4+
; to lxvdsx. Bugpoint-reduced test from Eric Schweitz.
5+
6+
%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625 = type <{ [28 x i8] }>
7+
%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626 = type <{ [64 x i8] }>
8+
9+
@.BSS38 = external global %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, align 32
10+
@_main1_2_ = external global %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, section ".comm", align 16
11+
12+
define void @aercalc_() {
13+
L.entry:
14+
br i1 undef, label %L.LB38_2426, label %L.LB38_2911
15+
16+
L.LB38_2911:
17+
br i1 undef, label %L.LB38_2140, label %L.LB38_2640
18+
19+
L.LB38_2640:
20+
unreachable
21+
22+
L.LB38_2426:
23+
br i1 undef, label %L.LB38_2438, label %L.LB38_2920
24+
25+
L.LB38_2920:
26+
br i1 undef, label %L.LB38_2438, label %L.LB38_2921
27+
28+
L.LB38_2921:
29+
br label %L.LB38_2140
30+
31+
L.LB38_2140:
32+
ret void
33+
34+
L.LB38_2438:
35+
br i1 undef, label %L.LB38_2451, label %L.LB38_2935
36+
37+
L.LB38_2935:
38+
br i1 undef, label %L.LB38_2451, label %L.LB38_2936
39+
40+
L.LB38_2936:
41+
unreachable
42+
43+
L.LB38_2451:
44+
br i1 undef, label %L.LB38_2452, label %L.LB38_2937
45+
46+
L.LB38_2937:
47+
unreachable
48+
49+
L.LB38_2452:
50+
%0 = load float, float* bitcast (i8* getelementptr inbounds (%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625* @.BSS38, i64 0, i32 0, i64 16) to float*), align 16
51+
%1 = fpext float %0 to double
52+
%2 = insertelement <2 x double> undef, double %1, i32 1
53+
store <2 x double> %2, <2 x double>* bitcast (i8* getelementptr inbounds (%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626* @_main1_2_, i64 0, i32 0, i64 32) to <2 x double>*), align 16
54+
unreachable
55+
}
56+
57+
; CHECK-LABEL: @aercalc_
58+
; CHECK: lxsspx

0 commit comments

Comments
 (0)
Please sign in to comment.