Index: llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -725,10 +725,10 @@ Res = Builder.CreateSub(Num, Rem); } - // Truncate to number of bits this divide really is. + // Extend in register from the number of bits this divide really is. if (IsSigned) { - Res = Builder.CreateTrunc(Res, Builder.getIntNTy(DivBits)); - Res = Builder.CreateSExt(Res, Ty); + Res = Builder.CreateShl(Res, 32 - DivBits); + Res = Builder.CreateAShr(Res, 32 - DivBits); } else { ConstantInt *TruncMask = Builder.getInt32((UINT64_C(1) << DivBits) - 1); Res = Builder.CreateAnd(Res, TruncMask); Index: llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -294,8 +294,8 @@ ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 -; CHECK-NEXT: [[TMP20:%.*]] = sext i16 [[TMP19]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = shl i32 [[TMP18]], 16 +; CHECK-NEXT: [[TMP20:%.*]] = ashr i32 [[TMP19]], 16 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 ; CHECK-NEXT: store i16 [[TMP21]], i16 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void @@ -327,8 +327,8 @@ ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]] ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 -; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 16 +; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 16 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 ; CHECK-NEXT: store i16 [[TMP23]], i16 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void @@ -414,8 +414,8 @@ ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i8 -; CHECK-NEXT: [[TMP20:%.*]] = sext i8 [[TMP19]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = shl i32 [[TMP18]], 24 +; CHECK-NEXT: [[TMP20:%.*]] = ashr i32 [[TMP19]], 24 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8 ; CHECK-NEXT: store i8 [[TMP21]], i8 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void @@ -447,8 +447,8 @@ ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]] ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8 -; CHECK-NEXT: [[TMP22:%.*]] = sext i8 [[TMP21]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 24 +; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 24 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i8 ; CHECK-NEXT: store i8 [[TMP23]], i8 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void @@ -1464,8 +1464,8 @@ ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 -; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 16 +; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 16 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i16> undef, i16 [[TMP23]], i64 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i16> [[X]], i64 1 @@ -1488,8 +1488,8 @@ ; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] ; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0 ; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i16 -; CHECK-NEXT: [[TMP46:%.*]] = sext i16 [[TMP45]] to i32 +; CHECK-NEXT: [[TMP45:%.*]] = shl i32 [[TMP44]], 16 +; CHECK-NEXT: [[TMP46:%.*]] = ashr i32 [[TMP45]], 16 ; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i16 ; CHECK-NEXT: [[TMP48:%.*]] = insertelement <4 x i16> [[TMP24]], i16 [[TMP47]], i64 1 ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i16> [[X]], i64 2 @@ -1512,8 +1512,8 @@ ; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0 ; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]] -; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i16 -; CHECK-NEXT: [[TMP70:%.*]] = sext i16 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = shl i32 [[TMP68]], 16 +; CHECK-NEXT: [[TMP70:%.*]] = ashr i32 [[TMP69]], 16 ; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i16 ; CHECK-NEXT: [[TMP72:%.*]] = insertelement <4 x i16> [[TMP48]], i16 [[TMP71]], i64 2 ; CHECK-NEXT: [[TMP73:%.*]] = extractelement <4 x i16> [[X]], i64 3 @@ -1536,8 +1536,8 @@ ; CHECK-NEXT: [[TMP90:%.*]] = fcmp fast oge float [[TMP88]], [[TMP89]] ; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 [[TMP79]], i32 0 ; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP87]], [[TMP91]] -; CHECK-NEXT: [[TMP93:%.*]] = trunc i32 [[TMP92]] to i16 -; CHECK-NEXT: [[TMP94:%.*]] = sext i16 [[TMP93]] to i32 +; CHECK-NEXT: [[TMP93:%.*]] = shl i32 [[TMP92]], 16 +; CHECK-NEXT: [[TMP94:%.*]] = ashr i32 [[TMP93]], 16 ; CHECK-NEXT: [[TMP95:%.*]] = trunc i32 [[TMP94]] to i16 ; CHECK-NEXT: [[TMP96:%.*]] = insertelement <4 x i16> [[TMP72]], i16 [[TMP95]], i64 3 ; CHECK-NEXT: store <4 x i16> [[TMP96]], <4 x i16> addrspace(1)* [[OUT:%.*]] @@ -1572,8 +1572,8 @@ ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]] ; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 -; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 16 +; CHECK-NEXT: [[TMP24:%.*]] = ashr i32 [[TMP23]], 16 ; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> undef, i16 [[TMP25]], i64 0 ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[X]], i64 1 @@ -1598,8 +1598,8 @@ ; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]] ; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]] -; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i16 -; CHECK-NEXT: [[TMP50:%.*]] = sext i16 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP49:%.*]] = shl i32 [[TMP48]], 16 +; CHECK-NEXT: [[TMP50:%.*]] = ashr i32 [[TMP49]], 16 ; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i16 ; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP51]], i64 1 ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i16> [[X]], i64 2 @@ -1624,8 +1624,8 @@ ; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]] ; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]] ; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]] -; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i16 -; CHECK-NEXT: [[TMP76:%.*]] = sext i16 [[TMP75]] to i32 +; CHECK-NEXT: [[TMP75:%.*]] = shl i32 [[TMP74]], 16 +; CHECK-NEXT: [[TMP76:%.*]] = ashr i32 [[TMP75]], 16 ; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i16 ; CHECK-NEXT: [[TMP78:%.*]] = insertelement <4 x i16> [[TMP52]], i16 [[TMP77]], i64 2 ; CHECK-NEXT: [[TMP79:%.*]] = extractelement <4 x i16> [[X]], i64 3 @@ -1650,8 +1650,8 @@ ; CHECK-NEXT: [[TMP98:%.*]] = add i32 [[TMP93]], [[TMP97]] ; CHECK-NEXT: [[TMP99:%.*]] = mul i32 [[TMP98]], [[TMP82]] ; CHECK-NEXT: [[TMP100:%.*]] = sub i32 [[TMP81]], [[TMP99]] -; CHECK-NEXT: [[TMP101:%.*]] = trunc i32 [[TMP100]] to i16 -; CHECK-NEXT: [[TMP102:%.*]] = sext i16 [[TMP101]] to i32 +; CHECK-NEXT: [[TMP101:%.*]] = shl i32 [[TMP100]], 16 +; CHECK-NEXT: [[TMP102:%.*]] = ashr i32 [[TMP101]], 16 ; CHECK-NEXT: [[TMP103:%.*]] = trunc i32 [[TMP102]] to i16 ; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i16> [[TMP78]], i16 [[TMP103]], i64 3 ; CHECK-NEXT: store <4 x i16> [[TMP104]], <4 x i16> addrspace(1)* [[OUT:%.*]] @@ -1738,8 +1738,8 @@ ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i3 -; CHECK-NEXT: [[TMP20:%.*]] = sext i3 [[TMP19]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = shl i32 [[TMP18]], 29 +; CHECK-NEXT: [[TMP20:%.*]] = ashr i32 [[TMP19]], 29 ; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i3 ; CHECK-NEXT: store i3 [[TMP21]], i3 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void @@ -1771,8 +1771,8 @@ ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP13]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP2]] ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP1]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i3 -; CHECK-NEXT: [[TMP22:%.*]] = sext i3 [[TMP21]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 29 +; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 29 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i3 ; CHECK-NEXT: store i3 [[TMP23]], i3 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void @@ -1950,8 +1950,8 @@ ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 -; CHECK-NEXT: [[TMP22:%.*]] = sext i16 [[TMP21]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 16 +; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 16 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x i16> undef, i16 [[TMP23]], i64 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x i16> [[X]], i64 1 @@ -1974,8 +1974,8 @@ ; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] ; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0 ; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i16 -; CHECK-NEXT: [[TMP46:%.*]] = sext i16 [[TMP45]] to i32 +; CHECK-NEXT: [[TMP45:%.*]] = shl i32 [[TMP44]], 16 +; CHECK-NEXT: [[TMP46:%.*]] = ashr i32 [[TMP45]], 16 ; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i16 ; CHECK-NEXT: [[TMP48:%.*]] = insertelement <3 x i16> [[TMP24]], i16 [[TMP47]], i64 1 ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <3 x i16> [[X]], i64 2 @@ -1998,8 +1998,8 @@ ; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0 ; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]] -; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i16 -; CHECK-NEXT: [[TMP70:%.*]] = sext i16 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = shl i32 [[TMP68]], 16 +; CHECK-NEXT: [[TMP70:%.*]] = ashr i32 [[TMP69]], 16 ; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i16 ; CHECK-NEXT: [[TMP72:%.*]] = insertelement <3 x i16> [[TMP48]], i16 [[TMP71]], i64 2 ; CHECK-NEXT: store <3 x i16> [[TMP72]], <3 x i16> addrspace(1)* [[OUT:%.*]] @@ -2034,8 +2034,8 @@ ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]] ; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 -; CHECK-NEXT: [[TMP24:%.*]] = sext i16 [[TMP23]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 16 +; CHECK-NEXT: [[TMP24:%.*]] = ashr i32 [[TMP23]], 16 ; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x i16> undef, i16 [[TMP25]], i64 0 ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x i16> [[X]], i64 1 @@ -2060,8 +2060,8 @@ ; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]] ; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]] -; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i16 -; CHECK-NEXT: [[TMP50:%.*]] = sext i16 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP49:%.*]] = shl i32 [[TMP48]], 16 +; CHECK-NEXT: [[TMP50:%.*]] = ashr i32 [[TMP49]], 16 ; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i16 ; CHECK-NEXT: [[TMP52:%.*]] = insertelement <3 x i16> [[TMP26]], i16 [[TMP51]], i64 1 ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i16> [[X]], i64 2 @@ -2086,8 +2086,8 @@ ; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]] ; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]] ; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]] -; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i16 -; CHECK-NEXT: [[TMP76:%.*]] = sext i16 [[TMP75]] to i32 +; CHECK-NEXT: [[TMP75:%.*]] = shl i32 [[TMP74]], 16 +; CHECK-NEXT: [[TMP76:%.*]] = ashr i32 [[TMP75]], 16 ; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i16 ; CHECK-NEXT: [[TMP78:%.*]] = insertelement <3 x i16> [[TMP52]], i16 [[TMP77]], i64 2 ; CHECK-NEXT: store <3 x i16> [[TMP78]], <3 x i16> addrspace(1)* [[OUT:%.*]] @@ -2266,8 +2266,8 @@ ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i15 -; CHECK-NEXT: [[TMP22:%.*]] = sext i15 [[TMP21]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 17 +; CHECK-NEXT: [[TMP22:%.*]] = ashr i32 [[TMP21]], 17 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i15 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x i15> undef, i15 [[TMP23]], i64 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x i15> [[X]], i64 1 @@ -2290,8 +2290,8 @@ ; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] ; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP31]], i32 0 ; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP39]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = trunc i32 [[TMP44]] to i15 -; CHECK-NEXT: [[TMP46:%.*]] = sext i15 [[TMP45]] to i32 +; CHECK-NEXT: [[TMP45:%.*]] = shl i32 [[TMP44]], 17 +; CHECK-NEXT: [[TMP46:%.*]] = ashr i32 [[TMP45]], 17 ; CHECK-NEXT: [[TMP47:%.*]] = trunc i32 [[TMP46]] to i15 ; CHECK-NEXT: [[TMP48:%.*]] = insertelement <3 x i15> [[TMP24]], i15 [[TMP47]], i64 1 ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <3 x i15> [[X]], i64 2 @@ -2314,8 +2314,8 @@ ; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i32 [[TMP55]], i32 0 ; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[TMP63]], [[TMP67]] -; CHECK-NEXT: [[TMP69:%.*]] = trunc i32 [[TMP68]] to i15 -; CHECK-NEXT: [[TMP70:%.*]] = sext i15 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = shl i32 [[TMP68]], 17 +; CHECK-NEXT: [[TMP70:%.*]] = ashr i32 [[TMP69]], 17 ; CHECK-NEXT: [[TMP71:%.*]] = trunc i32 [[TMP70]] to i15 ; CHECK-NEXT: [[TMP72:%.*]] = insertelement <3 x i15> [[TMP48]], i15 [[TMP71]], i64 2 ; CHECK-NEXT: store <3 x i15> [[TMP72]], <3 x i15> addrspace(1)* [[OUT:%.*]] @@ -2350,8 +2350,8 @@ ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP15]], [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[TMP20]], [[TMP4]] ; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP3]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i15 -; CHECK-NEXT: [[TMP24:%.*]] = sext i15 [[TMP23]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 17 +; CHECK-NEXT: [[TMP24:%.*]] = ashr i32 [[TMP23]], 17 ; CHECK-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i15 ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <3 x i15> undef, i15 [[TMP25]], i64 0 ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <3 x i15> [[X]], i64 1 @@ -2376,8 +2376,8 @@ ; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP41]], [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], [[TMP30]] ; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP29]], [[TMP47]] -; CHECK-NEXT: [[TMP49:%.*]] = trunc i32 [[TMP48]] to i15 -; CHECK-NEXT: [[TMP50:%.*]] = sext i15 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP49:%.*]] = shl i32 [[TMP48]], 17 +; CHECK-NEXT: [[TMP50:%.*]] = ashr i32 [[TMP49]], 17 ; CHECK-NEXT: [[TMP51:%.*]] = trunc i32 [[TMP50]] to i15 ; CHECK-NEXT: [[TMP52:%.*]] = insertelement <3 x i15> [[TMP26]], i15 [[TMP51]], i64 1 ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i15> [[X]], i64 2 @@ -2402,8 +2402,8 @@ ; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP67]], [[TMP71]] ; CHECK-NEXT: [[TMP73:%.*]] = mul i32 [[TMP72]], [[TMP56]] ; CHECK-NEXT: [[TMP74:%.*]] = sub i32 [[TMP55]], [[TMP73]] -; CHECK-NEXT: [[TMP75:%.*]] = trunc i32 [[TMP74]] to i15 -; CHECK-NEXT: [[TMP76:%.*]] = sext i15 [[TMP75]] to i32 +; CHECK-NEXT: [[TMP75:%.*]] = shl i32 [[TMP74]], 17 +; CHECK-NEXT: [[TMP76:%.*]] = ashr i32 [[TMP75]], 17 ; CHECK-NEXT: [[TMP77:%.*]] = trunc i32 [[TMP76]] to i15 ; CHECK-NEXT: [[TMP78:%.*]] = insertelement <3 x i15> [[TMP52]], i15 [[TMP77]], i64 2 ; CHECK-NEXT: store <3 x i15> [[TMP78]], <3 x i15> addrspace(1)* [[OUT:%.*]]