Index: llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -987,7 +987,6 @@ ConstantInt *Zero = Builder.getInt32(0); ConstantInt *One = Builder.getInt32(1); - ConstantInt *MinusOne = Builder.getInt32(~0); Value *Sign = nullptr; if (IsSigned) { @@ -1048,18 +1047,14 @@ // Remainder = Num - Num_S_Remainder Value *Remainder = Builder.CreateSub(Num, Num_S_Remainder); - // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) - Value *Rem_GE_Den_CC = Builder.CreateICmpUGE(Remainder, Den); - Value *Remainder_GE_Den = Builder.CreateSelect(Rem_GE_Den_CC, MinusOne, Zero); + // Remainder_GE_Den = Remainder >= Den; + Value *Remainder_GE_Den = Builder.CreateICmpUGE(Remainder, Den); - // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0) - Value *Num_GE_Num_S_Rem_CC = Builder.CreateICmpUGE(Num, Num_S_Remainder); - Value *Remainder_GE_Zero = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, - MinusOne, Zero); + // Remainder_GE_Zero = Num >= Num_S_Remainder + Value *Remainder_GE_Zero = Builder.CreateICmpUGE(Num, Num_S_Remainder); // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero Value *Tmp1 = Builder.CreateAnd(Remainder_GE_Den, Remainder_GE_Zero); - Value *Tmp1_0_CC = Builder.CreateICmpEQ(Tmp1, Zero); Value *Res; if (IsDiv) { @@ -1069,11 +1064,11 @@ // Quotient_S_One = Quotient - 1 Value *Quotient_S_One = Builder.CreateSub(Quotient, One); - // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) - Value *Div = Builder.CreateSelect(Tmp1_0_CC, Quotient, Quotient_A_One); + // Div = (Tmp1 ? Quotient_A_One : Quotient) + Value *Div = Builder.CreateSelect(Tmp1, Quotient_A_One, Quotient); - // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) - Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Div, Quotient_S_One); + // Div = (Remainder_GE_Zero ? Div : Quotient_S_One) + Res = Builder.CreateSelect(Remainder_GE_Zero, Div, Quotient_S_One); } else { // Remainder_S_Den = Remainder - Den Value *Remainder_S_Den = Builder.CreateSub(Remainder, Den); @@ -1081,11 +1076,11 @@ // Remainder_A_Den = Remainder + Den Value *Remainder_A_Den = Builder.CreateAdd(Remainder, Den); - // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) - Value *Rem = Builder.CreateSelect(Tmp1_0_CC, Remainder, Remainder_S_Den); + // Rem = (Tmp1 ? Remainder_S_Den : Remainder) + Value *Rem = Builder.CreateSelect(Tmp1, Remainder_S_Den, Remainder); - // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) - Res = Builder.CreateSelect(Num_GE_Num_S_Rem_CC, Rem, Remainder_A_Den); + // Rem = (Remainder_GE_Zero ? Rem : Remainder_A_Den) + Res = Builder.CreateSelect(Remainder_GE_Zero, Rem, Remainder_A_Den); } if (IsSigned) { Index: llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll @@ -121,18 +121,15 @@ ; IR-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], [[TMP4]] ; IR-NEXT: [[TMP33:%.*]] = sub i32 1000000, [[TMP32]] ; IR-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP33]], [[TMP4]] -; IR-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 -1, i32 0 -; IR-NEXT: [[TMP36:%.*]] = icmp uge i32 1000000, [[TMP32]] -; IR-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 -1, i32 0 -; IR-NEXT: [[TMP38:%.*]] = and i32 [[TMP35]], [[TMP37]] -; IR-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 0 -; IR-NEXT: [[TMP40:%.*]] = add i32 [[TMP31]], 1 -; IR-NEXT: [[TMP41:%.*]] = sub i32 [[TMP31]], 1 -; IR-NEXT: [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP31]], i32 [[TMP40]] -; IR-NEXT: [[TMP43:%.*]] = select i1 [[TMP36]], i32 [[TMP42]], i32 [[TMP41]] -; IR-NEXT: [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP2]] -; IR-NEXT: [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP2]] -; IR-NEXT: ret i32 [[TMP45]] +; IR-NEXT: [[TMP35:%.*]] = icmp uge i32 1000000, [[TMP32]] +; IR-NEXT: [[TMP36:%.*]] = and i1 [[TMP34]], [[TMP35]] +; IR-NEXT: [[TMP37:%.*]] = add i32 [[TMP31]], 1 +; IR-NEXT: [[TMP38:%.*]] = sub i32 [[TMP31]], 1 +; IR-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP31]] +; IR-NEXT: [[TMP40:%.*]] = select i1 [[TMP35]], i32 [[TMP39]], i32 [[TMP38]] +; IR-NEXT: [[TMP41:%.*]] = xor i32 [[TMP40]], [[TMP2]] +; IR-NEXT: [[TMP42:%.*]] = sub i32 [[TMP41]], [[TMP2]] +; IR-NEXT: ret i32 [[TMP42]] ; ; GCN-LABEL: select_sdiv_lhs_opaque_const0_i32: ; GCN: ; %bb.0: @@ -219,18 +216,15 @@ ; IR-NEXT: [[TMP32:%.*]] = mul i32 [[TMP31]], [[TMP4]] ; IR-NEXT: [[TMP33:%.*]] = sub i32 1000000, [[TMP32]] ; IR-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP33]], [[TMP4]] -; IR-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 -1, i32 0 -; IR-NEXT: [[TMP36:%.*]] = icmp uge i32 1000000, [[TMP32]] -; IR-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 -1, i32 0 -; IR-NEXT: [[TMP38:%.*]] = and i32 [[TMP35]], [[TMP37]] -; IR-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP38]], 0 -; IR-NEXT: [[TMP40:%.*]] = add i32 [[TMP31]], 1 -; IR-NEXT: [[TMP41:%.*]] = sub i32 [[TMP31]], 1 -; IR-NEXT: [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP31]], i32 [[TMP40]] -; IR-NEXT: [[TMP43:%.*]] = select i1 [[TMP36]], i32 [[TMP42]], i32 [[TMP41]] -; IR-NEXT: [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP2]] -; IR-NEXT: [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP2]] -; IR-NEXT: ret i32 [[TMP45]] +; IR-NEXT: [[TMP35:%.*]] = icmp uge i32 1000000, [[TMP32]] +; IR-NEXT: [[TMP36:%.*]] = and i1 [[TMP34]], [[TMP35]] +; IR-NEXT: [[TMP37:%.*]] = add i32 [[TMP31]], 1 +; IR-NEXT: [[TMP38:%.*]] = sub i32 [[TMP31]], 1 +; IR-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP37]], i32 [[TMP31]] +; IR-NEXT: [[TMP40:%.*]] = select i1 [[TMP35]], i32 [[TMP39]], i32 [[TMP38]] +; IR-NEXT: [[TMP41:%.*]] = xor i32 [[TMP40]], [[TMP2]] +; IR-NEXT: [[TMP42:%.*]] = sub i32 [[TMP41]], [[TMP2]] +; IR-NEXT: ret i32 [[TMP42]] ; ; GCN-LABEL: select_sdiv_lhs_opaque_const1_i32: ; GCN: ; %bb.0: Index: llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -36,16 +36,13 @@ ; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[TMP28]], [[Y]] ; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[X]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp uge i32 [[TMP30]], [[Y]] -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[X]], [[TMP29]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP32]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], 0 -; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP28]], 1 -; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP28]], 1 -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP28]], i32 [[TMP37]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP33]], i32 [[TMP39]], i32 [[TMP38]] -; CHECK-NEXT: store i32 [[TMP40]], i32 addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp uge i32 [[X]], [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP28]], 1 +; CHECK-NEXT: [[TMP35:%.*]] = sub i32 [[TMP28]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP33]], i32 [[TMP34]], i32 [[TMP28]] +; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP32]], i32 [[TMP36]], i32 [[TMP35]] +; CHECK-NEXT: store i32 [[TMP37]], i32 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: udiv_i32: @@ -118,16 +115,13 @@ ; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[TMP28]], [[Y]] ; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[X]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = icmp uge i32 [[TMP30]], [[Y]] -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[X]], [[TMP29]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP32]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], 0 -; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP30]], [[Y]] -; CHECK-NEXT: [[TMP38:%.*]] = add i32 [[TMP30]], [[Y]] -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP36]], i32 [[TMP30]], i32 [[TMP37]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP33]], i32 [[TMP39]], i32 [[TMP38]] -; CHECK-NEXT: store i32 [[TMP40]], i32 addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp uge i32 [[X]], [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = sub i32 [[TMP30]], [[Y]] +; CHECK-NEXT: [[TMP35:%.*]] = add i32 [[TMP30]], [[Y]] +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP33]], i32 [[TMP34]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP32]], i32 [[TMP36]], i32 [[TMP35]] +; CHECK-NEXT: store i32 [[TMP37]], i32 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: urem_i32: @@ -207,18 +201,15 @@ ; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[TMP35]], [[TMP7]] ; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP6]], [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = icmp uge i32 [[TMP37]], [[TMP7]] -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP6]], [[TMP36]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], [[TMP41]] -; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[TMP42]], 0 -; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP35]], 1 -; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP35]], 1 -; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP43]], i32 [[TMP35]], i32 [[TMP44]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP40]], i32 [[TMP46]], i32 [[TMP45]] -; CHECK-NEXT: [[TMP48:%.*]] = xor i32 [[TMP47]], [[TMP3]] -; CHECK-NEXT: [[TMP49:%.*]] = sub i32 [[TMP48]], [[TMP3]] -; CHECK-NEXT: store i32 [[TMP49]], i32 addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP6]], [[TMP36]] +; CHECK-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP35]], 1 +; CHECK-NEXT: [[TMP42:%.*]] = sub i32 [[TMP35]], 1 +; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP40]], i32 [[TMP41]], i32 [[TMP35]] +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP39]], i32 [[TMP43]], i32 [[TMP42]] +; CHECK-NEXT: [[TMP45:%.*]] = xor i32 [[TMP44]], [[TMP3]] +; CHECK-NEXT: [[TMP46:%.*]] = sub i32 [[TMP45]], [[TMP3]] +; CHECK-NEXT: store i32 [[TMP46]], i32 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: sdiv_i32: @@ -306,18 +297,15 @@ ; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[TMP34]], [[TMP6]] ; CHECK-NEXT: [[TMP36:%.*]] = sub i32 [[TMP5]], [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = icmp uge i32 [[TMP36]], [[TMP6]] -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP5]], [[TMP35]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP38]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 0 -; CHECK-NEXT: [[TMP43:%.*]] = sub i32 [[TMP36]], [[TMP6]] -; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP36]], [[TMP6]] -; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP42]], i32 [[TMP36]], i32 [[TMP43]] -; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP39]], i32 [[TMP45]], i32 [[TMP44]] -; CHECK-NEXT: [[TMP47:%.*]] = xor i32 [[TMP46]], [[TMP1]] -; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP47]], [[TMP1]] -; CHECK-NEXT: store i32 [[TMP48]], i32 addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp uge i32 [[TMP5]], [[TMP35]] +; CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP36]], [[TMP6]] +; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP36]], [[TMP6]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP39]], i32 [[TMP40]], i32 [[TMP36]] +; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP38]], i32 [[TMP42]], i32 [[TMP41]] +; CHECK-NEXT: [[TMP44:%.*]] = xor i32 [[TMP43]], [[TMP1]] +; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP44]], [[TMP1]] +; CHECK-NEXT: store i32 [[TMP45]], i32 addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: srem_i32: @@ -827,146 +815,134 @@ ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] ; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP30]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> undef, i32 [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[Y]], i64 1 -; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float -; CHECK-NEXT: [[TMP47:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP46]]) -; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 -; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 -; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] -; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 -; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 -; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] -; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] -; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 -; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 -; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 -; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] -; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 -; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 -; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 -; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 -; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] -; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 -; CHECK-NEXT: [[TMP82:%.*]] = add i32 [[TMP73]], 1 -; CHECK-NEXT: [[TMP83:%.*]] = sub i32 [[TMP73]], 1 -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP73]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] -; CHECK-NEXT: [[TMP86:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP85]], i64 1 -; CHECK-NEXT: [[TMP87:%.*]] = extractelement <4 x i32> [[X]], i64 2 -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <4 x i32> [[Y]], i64 2 -; CHECK-NEXT: [[TMP89:%.*]] = uitofp i32 [[TMP88]] to float -; CHECK-NEXT: [[TMP90:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP89]]) -; CHECK-NEXT: [[TMP91:%.*]] = fmul fast float [[TMP90]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP92:%.*]] = fptoui float [[TMP91]] to i32 -; CHECK-NEXT: [[TMP93:%.*]] = zext i32 [[TMP92]] to i64 -; CHECK-NEXT: [[TMP94:%.*]] = zext i32 [[TMP88]] to i64 -; CHECK-NEXT: [[TMP95:%.*]] = mul i64 [[TMP93]], [[TMP94]] -; CHECK-NEXT: [[TMP96:%.*]] = trunc i64 [[TMP95]] to i32 -; CHECK-NEXT: [[TMP97:%.*]] = lshr i64 [[TMP95]], 32 -; CHECK-NEXT: [[TMP98:%.*]] = trunc i64 [[TMP97]] to i32 -; CHECK-NEXT: [[TMP99:%.*]] = sub i32 0, [[TMP96]] -; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[TMP98]], 0 -; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[TMP99]], i32 [[TMP96]] -; CHECK-NEXT: [[TMP102:%.*]] = zext i32 [[TMP101]] to i64 -; CHECK-NEXT: [[TMP103:%.*]] = zext i32 [[TMP92]] to i64 -; CHECK-NEXT: [[TMP104:%.*]] = mul i64 [[TMP102]], [[TMP103]] -; CHECK-NEXT: [[TMP105:%.*]] = trunc i64 [[TMP104]] to i32 -; CHECK-NEXT: [[TMP106:%.*]] = lshr i64 [[TMP104]], 32 -; CHECK-NEXT: [[TMP107:%.*]] = trunc i64 [[TMP106]] to i32 -; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP92]], [[TMP107]] -; CHECK-NEXT: [[TMP109:%.*]] = sub i32 [[TMP92]], [[TMP107]] -; CHECK-NEXT: [[TMP110:%.*]] = select i1 [[TMP100]], i32 [[TMP108]], i32 [[TMP109]] -; CHECK-NEXT: [[TMP111:%.*]] = zext i32 [[TMP110]] to i64 -; CHECK-NEXT: [[TMP112:%.*]] = zext i32 [[TMP87]] to i64 -; CHECK-NEXT: [[TMP113:%.*]] = mul i64 [[TMP111]], [[TMP112]] -; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i32 -; CHECK-NEXT: [[TMP115:%.*]] = lshr i64 [[TMP113]], 32 -; CHECK-NEXT: [[TMP116:%.*]] = trunc i64 [[TMP115]] to i32 -; CHECK-NEXT: [[TMP117:%.*]] = mul i32 [[TMP116]], [[TMP88]] -; CHECK-NEXT: [[TMP118:%.*]] = sub i32 [[TMP87]], [[TMP117]] -; CHECK-NEXT: [[TMP119:%.*]] = icmp uge i32 [[TMP118]], [[TMP88]] -; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP121:%.*]] = icmp uge i32 [[TMP87]], [[TMP117]] -; CHECK-NEXT: [[TMP122:%.*]] = select i1 [[TMP121]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP123:%.*]] = and i32 [[TMP120]], [[TMP122]] -; CHECK-NEXT: [[TMP124:%.*]] = icmp eq i32 [[TMP123]], 0 -; CHECK-NEXT: [[TMP125:%.*]] = add i32 [[TMP116]], 1 -; CHECK-NEXT: [[TMP126:%.*]] = sub i32 [[TMP116]], 1 -; CHECK-NEXT: [[TMP127:%.*]] = select i1 [[TMP124]], i32 [[TMP116]], i32 [[TMP125]] -; CHECK-NEXT: [[TMP128:%.*]] = select i1 [[TMP121]], i32 [[TMP127]], i32 [[TMP126]] -; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP128]], i64 2 -; CHECK-NEXT: [[TMP130:%.*]] = extractelement <4 x i32> [[X]], i64 3 -; CHECK-NEXT: [[TMP131:%.*]] = extractelement <4 x i32> [[Y]], i64 3 -; CHECK-NEXT: [[TMP132:%.*]] = uitofp i32 [[TMP131]] to float -; CHECK-NEXT: [[TMP133:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP132]]) -; CHECK-NEXT: [[TMP134:%.*]] = fmul fast float [[TMP133]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP135:%.*]] = fptoui float [[TMP134]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP30]], 1 +; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP30]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP36]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> undef, i32 [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i32> [[Y]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = uitofp i32 [[TMP42]] to float +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = fmul fast float [[TMP44]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP46:%.*]] = fptoui float [[TMP45]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = zext i32 [[TMP42]] to i64 +; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = trunc i64 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP51:%.*]] = lshr i64 [[TMP49]], 32 +; CHECK-NEXT: [[TMP52:%.*]] = trunc i64 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP53:%.*]] = sub i32 0, [[TMP50]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp eq i32 [[TMP52]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[TMP53]], i32 [[TMP50]] +; CHECK-NEXT: [[TMP56:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[TMP58]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = lshr i64 [[TMP58]], 32 +; CHECK-NEXT: [[TMP61:%.*]] = trunc i64 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP54]], i32 [[TMP62]], i32 [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP64]] to i64 +; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP41]] to i64 +; CHECK-NEXT: [[TMP67:%.*]] = mul i64 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP67]], 32 +; CHECK-NEXT: [[TMP70:%.*]] = trunc i64 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP71:%.*]] = mul i32 [[TMP70]], [[TMP42]] +; CHECK-NEXT: [[TMP72:%.*]] = sub i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp uge i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp uge i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP75:%.*]] = and i1 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = add i32 [[TMP70]], 1 +; CHECK-NEXT: [[TMP77:%.*]] = sub i32 [[TMP70]], 1 +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP75]], i32 [[TMP76]], i32 [[TMP70]] +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP74]], i32 [[TMP78]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP79]], i64 1 +; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i32> [[X]], i64 2 +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <4 x i32> [[Y]], i64 2 +; CHECK-NEXT: [[TMP83:%.*]] = uitofp i32 [[TMP82]] to float +; CHECK-NEXT: [[TMP84:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP83]]) +; CHECK-NEXT: [[TMP85:%.*]] = fmul fast float [[TMP84]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP86:%.*]] = fptoui float [[TMP85]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = zext i32 [[TMP86]] to i64 +; CHECK-NEXT: [[TMP88:%.*]] = zext i32 [[TMP82]] to i64 +; CHECK-NEXT: [[TMP89:%.*]] = mul i64 [[TMP87]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = trunc i64 [[TMP89]] to i32 +; CHECK-NEXT: [[TMP91:%.*]] = lshr i64 [[TMP89]], 32 +; CHECK-NEXT: [[TMP92:%.*]] = trunc i64 [[TMP91]] to i32 +; CHECK-NEXT: [[TMP93:%.*]] = sub i32 0, [[TMP90]] +; CHECK-NEXT: [[TMP94:%.*]] = icmp eq i32 [[TMP92]], 0 +; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[TMP93]], i32 [[TMP90]] +; CHECK-NEXT: [[TMP96:%.*]] = zext i32 [[TMP95]] to i64 +; CHECK-NEXT: [[TMP97:%.*]] = zext i32 [[TMP86]] to i64 +; CHECK-NEXT: [[TMP98:%.*]] = mul i64 [[TMP96]], [[TMP97]] +; CHECK-NEXT: [[TMP99:%.*]] = trunc i64 [[TMP98]] to i32 +; CHECK-NEXT: [[TMP100:%.*]] = lshr i64 [[TMP98]], 32 +; CHECK-NEXT: [[TMP101:%.*]] = trunc i64 [[TMP100]] to i32 +; CHECK-NEXT: [[TMP102:%.*]] = add i32 [[TMP86]], [[TMP101]] +; CHECK-NEXT: [[TMP103:%.*]] = sub i32 [[TMP86]], [[TMP101]] +; CHECK-NEXT: [[TMP104:%.*]] = select i1 [[TMP94]], i32 [[TMP102]], i32 [[TMP103]] +; CHECK-NEXT: [[TMP105:%.*]] = zext i32 [[TMP104]] to i64 +; CHECK-NEXT: [[TMP106:%.*]] = zext i32 [[TMP81]] to i64 +; CHECK-NEXT: [[TMP107:%.*]] = mul i64 [[TMP105]], [[TMP106]] +; CHECK-NEXT: [[TMP108:%.*]] = trunc i64 [[TMP107]] to i32 +; CHECK-NEXT: [[TMP109:%.*]] = lshr i64 [[TMP107]], 32 +; CHECK-NEXT: [[TMP110:%.*]] = trunc i64 [[TMP109]] to i32 +; CHECK-NEXT: [[TMP111:%.*]] = mul i32 [[TMP110]], [[TMP82]] +; CHECK-NEXT: [[TMP112:%.*]] = sub i32 [[TMP81]], [[TMP111]] +; CHECK-NEXT: [[TMP113:%.*]] = icmp uge i32 [[TMP112]], [[TMP82]] +; CHECK-NEXT: [[TMP114:%.*]] = icmp uge i32 [[TMP81]], [[TMP111]] +; CHECK-NEXT: [[TMP115:%.*]] = and i1 [[TMP113]], [[TMP114]] +; CHECK-NEXT: [[TMP116:%.*]] = add i32 [[TMP110]], 1 +; CHECK-NEXT: [[TMP117:%.*]] = sub i32 [[TMP110]], 1 +; CHECK-NEXT: [[TMP118:%.*]] = select i1 [[TMP115]], i32 [[TMP116]], i32 [[TMP110]] +; CHECK-NEXT: [[TMP119:%.*]] = select i1 [[TMP114]], i32 [[TMP118]], i32 [[TMP117]] +; CHECK-NEXT: [[TMP120:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP119]], i64 2 +; CHECK-NEXT: [[TMP121:%.*]] = extractelement <4 x i32> [[X]], i64 3 +; CHECK-NEXT: [[TMP122:%.*]] = extractelement <4 x i32> [[Y]], i64 3 +; CHECK-NEXT: [[TMP123:%.*]] = uitofp i32 [[TMP122]] to float +; CHECK-NEXT: [[TMP124:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP123]]) +; CHECK-NEXT: [[TMP125:%.*]] = fmul fast float [[TMP124]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP126:%.*]] = fptoui float [[TMP125]] to i32 +; CHECK-NEXT: [[TMP127:%.*]] = zext i32 [[TMP126]] to i64 +; CHECK-NEXT: [[TMP128:%.*]] = zext i32 [[TMP122]] to i64 +; CHECK-NEXT: [[TMP129:%.*]] = mul i64 [[TMP127]], [[TMP128]] +; CHECK-NEXT: [[TMP130:%.*]] = trunc i64 [[TMP129]] to i32 +; CHECK-NEXT: [[TMP131:%.*]] = lshr i64 [[TMP129]], 32 +; CHECK-NEXT: [[TMP132:%.*]] = trunc i64 [[TMP131]] to i32 +; CHECK-NEXT: [[TMP133:%.*]] = sub i32 0, [[TMP130]] +; CHECK-NEXT: [[TMP134:%.*]] = icmp eq i32 [[TMP132]], 0 +; CHECK-NEXT: [[TMP135:%.*]] = select i1 [[TMP134]], i32 [[TMP133]], i32 [[TMP130]] ; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 -; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP131]] to i64 +; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP126]] to i64 ; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] ; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 ; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 ; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 -; CHECK-NEXT: [[TMP142:%.*]] = sub i32 0, [[TMP139]] -; CHECK-NEXT: [[TMP143:%.*]] = icmp eq i32 [[TMP141]], 0 -; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 [[TMP142]], i32 [[TMP139]] +; CHECK-NEXT: [[TMP142:%.*]] = add i32 [[TMP126]], [[TMP141]] +; CHECK-NEXT: [[TMP143:%.*]] = sub i32 [[TMP126]], [[TMP141]] +; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP134]], i32 [[TMP142]], i32 [[TMP143]] ; CHECK-NEXT: [[TMP145:%.*]] = zext i32 [[TMP144]] to i64 -; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP135]] to i64 +; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP121]] to i64 ; CHECK-NEXT: [[TMP147:%.*]] = mul i64 [[TMP145]], [[TMP146]] ; CHECK-NEXT: [[TMP148:%.*]] = trunc i64 [[TMP147]] to i32 ; CHECK-NEXT: [[TMP149:%.*]] = lshr i64 [[TMP147]], 32 ; CHECK-NEXT: [[TMP150:%.*]] = trunc i64 [[TMP149]] to i32 -; CHECK-NEXT: [[TMP151:%.*]] = add i32 [[TMP135]], [[TMP150]] -; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP135]], [[TMP150]] -; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP143]], i32 [[TMP151]], i32 [[TMP152]] -; CHECK-NEXT: [[TMP154:%.*]] = zext i32 [[TMP153]] to i64 -; CHECK-NEXT: [[TMP155:%.*]] = zext i32 [[TMP130]] to i64 -; CHECK-NEXT: [[TMP156:%.*]] = mul i64 [[TMP154]], [[TMP155]] -; CHECK-NEXT: [[TMP157:%.*]] = trunc i64 [[TMP156]] to i32 -; CHECK-NEXT: [[TMP158:%.*]] = lshr i64 [[TMP156]], 32 -; CHECK-NEXT: [[TMP159:%.*]] = trunc i64 [[TMP158]] to i32 -; CHECK-NEXT: [[TMP160:%.*]] = mul i32 [[TMP159]], [[TMP131]] -; CHECK-NEXT: [[TMP161:%.*]] = sub i32 [[TMP130]], [[TMP160]] -; CHECK-NEXT: [[TMP162:%.*]] = icmp uge i32 [[TMP161]], [[TMP131]] -; CHECK-NEXT: [[TMP163:%.*]] = select i1 [[TMP162]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP164:%.*]] = icmp uge i32 [[TMP130]], [[TMP160]] -; CHECK-NEXT: [[TMP165:%.*]] = select i1 [[TMP164]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP166:%.*]] = and i32 [[TMP163]], [[TMP165]] -; CHECK-NEXT: [[TMP167:%.*]] = icmp eq i32 [[TMP166]], 0 -; CHECK-NEXT: [[TMP168:%.*]] = add i32 [[TMP159]], 1 -; CHECK-NEXT: [[TMP169:%.*]] = sub i32 [[TMP159]], 1 -; CHECK-NEXT: [[TMP170:%.*]] = select i1 [[TMP167]], i32 [[TMP159]], i32 [[TMP168]] -; CHECK-NEXT: [[TMP171:%.*]] = select i1 [[TMP164]], i32 [[TMP170]], i32 [[TMP169]] -; CHECK-NEXT: [[TMP172:%.*]] = insertelement <4 x i32> [[TMP129]], i32 [[TMP171]], i64 3 -; CHECK-NEXT: store <4 x i32> [[TMP172]], <4 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP151:%.*]] = mul i32 [[TMP150]], [[TMP122]] +; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP121]], [[TMP151]] +; CHECK-NEXT: [[TMP153:%.*]] = icmp uge i32 [[TMP152]], [[TMP122]] +; CHECK-NEXT: [[TMP154:%.*]] = icmp uge i32 [[TMP121]], [[TMP151]] +; CHECK-NEXT: [[TMP155:%.*]] = and i1 [[TMP153]], [[TMP154]] +; CHECK-NEXT: [[TMP156:%.*]] = add i32 [[TMP150]], 1 +; CHECK-NEXT: [[TMP157:%.*]] = sub i32 [[TMP150]], 1 +; CHECK-NEXT: [[TMP158:%.*]] = select i1 [[TMP155]], i32 [[TMP156]], i32 [[TMP150]] +; CHECK-NEXT: [[TMP159:%.*]] = select i1 [[TMP154]], i32 [[TMP158]], i32 [[TMP157]] +; CHECK-NEXT: [[TMP160:%.*]] = insertelement <4 x i32> [[TMP120]], i32 [[TMP159]], i64 3 +; CHECK-NEXT: store <4 x i32> [[TMP160]], <4 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: udiv_v4i32: @@ -1111,146 +1087,134 @@ ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] ; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP32]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> undef, i32 [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i32> [[Y]], i64 1 -; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float -; CHECK-NEXT: [[TMP47:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP46]]) -; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 -; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 -; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] -; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 -; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 -; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] -; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] -; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 -; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 -; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 -; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] -; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 -; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 -; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 -; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 -; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] -; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 -; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP83:%.*]] = add i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP75]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] -; CHECK-NEXT: [[TMP86:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP85]], i64 1 -; CHECK-NEXT: [[TMP87:%.*]] = extractelement <4 x i32> [[X]], i64 2 -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <4 x i32> [[Y]], i64 2 -; CHECK-NEXT: [[TMP89:%.*]] = uitofp i32 [[TMP88]] to float -; CHECK-NEXT: [[TMP90:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP89]]) -; CHECK-NEXT: [[TMP91:%.*]] = fmul fast float [[TMP90]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP92:%.*]] = fptoui float [[TMP91]] to i32 -; CHECK-NEXT: [[TMP93:%.*]] = zext i32 [[TMP92]] to i64 -; CHECK-NEXT: [[TMP94:%.*]] = zext i32 [[TMP88]] to i64 -; CHECK-NEXT: [[TMP95:%.*]] = mul i64 [[TMP93]], [[TMP94]] -; CHECK-NEXT: [[TMP96:%.*]] = trunc i64 [[TMP95]] to i32 -; CHECK-NEXT: [[TMP97:%.*]] = lshr i64 [[TMP95]], 32 -; CHECK-NEXT: [[TMP98:%.*]] = trunc i64 [[TMP97]] to i32 -; CHECK-NEXT: [[TMP99:%.*]] = sub i32 0, [[TMP96]] -; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[TMP98]], 0 -; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[TMP99]], i32 [[TMP96]] -; CHECK-NEXT: [[TMP102:%.*]] = zext i32 [[TMP101]] to i64 -; CHECK-NEXT: [[TMP103:%.*]] = zext i32 [[TMP92]] to i64 -; CHECK-NEXT: [[TMP104:%.*]] = mul i64 [[TMP102]], [[TMP103]] -; CHECK-NEXT: [[TMP105:%.*]] = trunc i64 [[TMP104]] to i32 -; CHECK-NEXT: [[TMP106:%.*]] = lshr i64 [[TMP104]], 32 -; CHECK-NEXT: [[TMP107:%.*]] = trunc i64 [[TMP106]] to i32 -; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP92]], [[TMP107]] -; CHECK-NEXT: [[TMP109:%.*]] = sub i32 [[TMP92]], [[TMP107]] -; CHECK-NEXT: [[TMP110:%.*]] = select i1 [[TMP100]], i32 [[TMP108]], i32 [[TMP109]] -; CHECK-NEXT: [[TMP111:%.*]] = zext i32 [[TMP110]] to i64 -; CHECK-NEXT: [[TMP112:%.*]] = zext i32 [[TMP87]] to i64 -; CHECK-NEXT: [[TMP113:%.*]] = mul i64 [[TMP111]], [[TMP112]] -; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i32 -; CHECK-NEXT: [[TMP115:%.*]] = lshr i64 [[TMP113]], 32 -; CHECK-NEXT: [[TMP116:%.*]] = trunc i64 [[TMP115]] to i32 -; CHECK-NEXT: [[TMP117:%.*]] = mul i32 [[TMP116]], [[TMP88]] -; CHECK-NEXT: [[TMP118:%.*]] = sub i32 [[TMP87]], [[TMP117]] -; CHECK-NEXT: [[TMP119:%.*]] = icmp uge i32 [[TMP118]], [[TMP88]] -; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP121:%.*]] = icmp uge i32 [[TMP87]], [[TMP117]] -; CHECK-NEXT: [[TMP122:%.*]] = select i1 [[TMP121]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP123:%.*]] = and i32 [[TMP120]], [[TMP122]] -; CHECK-NEXT: [[TMP124:%.*]] = icmp eq i32 [[TMP123]], 0 -; CHECK-NEXT: [[TMP125:%.*]] = sub i32 [[TMP118]], [[TMP88]] -; CHECK-NEXT: [[TMP126:%.*]] = add i32 [[TMP118]], [[TMP88]] -; CHECK-NEXT: [[TMP127:%.*]] = select i1 [[TMP124]], i32 [[TMP118]], i32 [[TMP125]] -; CHECK-NEXT: [[TMP128:%.*]] = select i1 [[TMP121]], i32 [[TMP127]], i32 [[TMP126]] -; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP128]], i64 2 -; CHECK-NEXT: [[TMP130:%.*]] = extractelement <4 x i32> [[X]], i64 3 -; CHECK-NEXT: [[TMP131:%.*]] = extractelement <4 x i32> [[Y]], i64 3 -; CHECK-NEXT: [[TMP132:%.*]] = uitofp i32 [[TMP131]] to float -; CHECK-NEXT: [[TMP133:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP132]]) -; CHECK-NEXT: [[TMP134:%.*]] = fmul fast float [[TMP133]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP135:%.*]] = fptoui float [[TMP134]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = sub i32 [[TMP32]], [[TMP2]] +; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP32]], [[TMP2]] +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP36]], i32 [[TMP32]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> undef, i32 [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i32> [[Y]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = uitofp i32 [[TMP42]] to float +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = fmul fast float [[TMP44]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP46:%.*]] = fptoui float [[TMP45]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = zext i32 [[TMP42]] to i64 +; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = trunc i64 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP51:%.*]] = lshr i64 [[TMP49]], 32 +; CHECK-NEXT: [[TMP52:%.*]] = trunc i64 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP53:%.*]] = sub i32 0, [[TMP50]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp eq i32 [[TMP52]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[TMP53]], i32 [[TMP50]] +; CHECK-NEXT: [[TMP56:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[TMP58]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = lshr i64 [[TMP58]], 32 +; CHECK-NEXT: [[TMP61:%.*]] = trunc i64 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP54]], i32 [[TMP62]], i32 [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP64]] to i64 +; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP41]] to i64 +; CHECK-NEXT: [[TMP67:%.*]] = mul i64 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP67]], 32 +; CHECK-NEXT: [[TMP70:%.*]] = trunc i64 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP71:%.*]] = mul i32 [[TMP70]], [[TMP42]] +; CHECK-NEXT: [[TMP72:%.*]] = sub i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp uge i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp uge i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP75:%.*]] = and i1 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = sub i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP77:%.*]] = add i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP75]], i32 [[TMP76]], i32 [[TMP72]] +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP74]], i32 [[TMP78]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP79]], i64 1 +; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i32> [[X]], i64 2 +; CHECK-NEXT: [[TMP82:%.*]] = extractelement <4 x i32> [[Y]], i64 2 +; CHECK-NEXT: [[TMP83:%.*]] = uitofp i32 [[TMP82]] to float +; CHECK-NEXT: [[TMP84:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP83]]) +; CHECK-NEXT: [[TMP85:%.*]] = fmul fast float [[TMP84]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP86:%.*]] = fptoui float [[TMP85]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = zext i32 [[TMP86]] to i64 +; CHECK-NEXT: [[TMP88:%.*]] = zext i32 [[TMP82]] to i64 +; CHECK-NEXT: [[TMP89:%.*]] = mul i64 [[TMP87]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = trunc i64 [[TMP89]] to i32 +; CHECK-NEXT: [[TMP91:%.*]] = lshr i64 [[TMP89]], 32 +; CHECK-NEXT: [[TMP92:%.*]] = trunc i64 [[TMP91]] to i32 +; CHECK-NEXT: [[TMP93:%.*]] = sub i32 0, [[TMP90]] +; CHECK-NEXT: [[TMP94:%.*]] = icmp eq i32 [[TMP92]], 0 +; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[TMP93]], i32 [[TMP90]] +; CHECK-NEXT: [[TMP96:%.*]] = zext i32 [[TMP95]] to i64 +; CHECK-NEXT: [[TMP97:%.*]] = zext i32 [[TMP86]] to i64 +; CHECK-NEXT: [[TMP98:%.*]] = mul i64 [[TMP96]], [[TMP97]] +; CHECK-NEXT: [[TMP99:%.*]] = trunc i64 [[TMP98]] to i32 +; CHECK-NEXT: [[TMP100:%.*]] = lshr i64 [[TMP98]], 32 +; CHECK-NEXT: [[TMP101:%.*]] = trunc i64 [[TMP100]] to i32 +; CHECK-NEXT: [[TMP102:%.*]] = add i32 [[TMP86]], [[TMP101]] +; CHECK-NEXT: [[TMP103:%.*]] = sub i32 [[TMP86]], [[TMP101]] +; CHECK-NEXT: [[TMP104:%.*]] = select i1 [[TMP94]], i32 [[TMP102]], i32 [[TMP103]] +; CHECK-NEXT: [[TMP105:%.*]] = zext i32 [[TMP104]] to i64 +; CHECK-NEXT: [[TMP106:%.*]] = zext i32 [[TMP81]] to i64 +; CHECK-NEXT: [[TMP107:%.*]] = mul i64 [[TMP105]], [[TMP106]] +; CHECK-NEXT: [[TMP108:%.*]] = trunc i64 [[TMP107]] to i32 +; CHECK-NEXT: [[TMP109:%.*]] = lshr i64 [[TMP107]], 32 +; CHECK-NEXT: [[TMP110:%.*]] = trunc i64 [[TMP109]] to i32 +; CHECK-NEXT: [[TMP111:%.*]] = mul i32 [[TMP110]], [[TMP82]] +; CHECK-NEXT: [[TMP112:%.*]] = sub i32 [[TMP81]], [[TMP111]] +; CHECK-NEXT: [[TMP113:%.*]] = icmp uge i32 [[TMP112]], [[TMP82]] +; CHECK-NEXT: [[TMP114:%.*]] = icmp uge i32 [[TMP81]], [[TMP111]] +; CHECK-NEXT: [[TMP115:%.*]] = and i1 [[TMP113]], [[TMP114]] +; CHECK-NEXT: [[TMP116:%.*]] = sub i32 [[TMP112]], [[TMP82]] +; CHECK-NEXT: [[TMP117:%.*]] = add i32 [[TMP112]], [[TMP82]] +; CHECK-NEXT: [[TMP118:%.*]] = select i1 [[TMP115]], i32 [[TMP116]], i32 [[TMP112]] +; CHECK-NEXT: [[TMP119:%.*]] = select i1 [[TMP114]], i32 [[TMP118]], i32 [[TMP117]] +; CHECK-NEXT: [[TMP120:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP119]], i64 2 +; CHECK-NEXT: [[TMP121:%.*]] = extractelement <4 x i32> [[X]], i64 3 +; CHECK-NEXT: [[TMP122:%.*]] = extractelement <4 x i32> [[Y]], i64 3 +; CHECK-NEXT: [[TMP123:%.*]] = uitofp i32 [[TMP122]] to float +; CHECK-NEXT: [[TMP124:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP123]]) +; CHECK-NEXT: [[TMP125:%.*]] = fmul fast float [[TMP124]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP126:%.*]] = fptoui float [[TMP125]] to i32 +; CHECK-NEXT: [[TMP127:%.*]] = zext i32 [[TMP126]] to i64 +; CHECK-NEXT: [[TMP128:%.*]] = zext i32 [[TMP122]] to i64 +; CHECK-NEXT: [[TMP129:%.*]] = mul i64 [[TMP127]], [[TMP128]] +; CHECK-NEXT: [[TMP130:%.*]] = trunc i64 [[TMP129]] to i32 +; CHECK-NEXT: [[TMP131:%.*]] = lshr i64 [[TMP129]], 32 +; CHECK-NEXT: [[TMP132:%.*]] = trunc i64 [[TMP131]] to i32 +; CHECK-NEXT: [[TMP133:%.*]] = sub i32 0, [[TMP130]] +; CHECK-NEXT: [[TMP134:%.*]] = icmp eq i32 [[TMP132]], 0 +; CHECK-NEXT: [[TMP135:%.*]] = select i1 [[TMP134]], i32 [[TMP133]], i32 [[TMP130]] ; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 -; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP131]] to i64 +; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP126]] to i64 ; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] ; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 ; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 ; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 -; CHECK-NEXT: [[TMP142:%.*]] = sub i32 0, [[TMP139]] -; CHECK-NEXT: [[TMP143:%.*]] = icmp eq i32 [[TMP141]], 0 -; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 [[TMP142]], i32 [[TMP139]] +; CHECK-NEXT: [[TMP142:%.*]] = add i32 [[TMP126]], [[TMP141]] +; CHECK-NEXT: [[TMP143:%.*]] = sub i32 [[TMP126]], [[TMP141]] +; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP134]], i32 [[TMP142]], i32 [[TMP143]] ; CHECK-NEXT: [[TMP145:%.*]] = zext i32 [[TMP144]] to i64 -; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP135]] to i64 +; CHECK-NEXT: [[TMP146:%.*]] = zext i32 [[TMP121]] to i64 ; CHECK-NEXT: [[TMP147:%.*]] = mul i64 [[TMP145]], [[TMP146]] ; CHECK-NEXT: [[TMP148:%.*]] = trunc i64 [[TMP147]] to i32 ; CHECK-NEXT: [[TMP149:%.*]] = lshr i64 [[TMP147]], 32 ; CHECK-NEXT: [[TMP150:%.*]] = trunc i64 [[TMP149]] to i32 -; CHECK-NEXT: [[TMP151:%.*]] = add i32 [[TMP135]], [[TMP150]] -; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP135]], [[TMP150]] -; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP143]], i32 [[TMP151]], i32 [[TMP152]] -; CHECK-NEXT: [[TMP154:%.*]] = zext i32 [[TMP153]] to i64 -; CHECK-NEXT: [[TMP155:%.*]] = zext i32 [[TMP130]] to i64 -; CHECK-NEXT: [[TMP156:%.*]] = mul i64 [[TMP154]], [[TMP155]] -; CHECK-NEXT: [[TMP157:%.*]] = trunc i64 [[TMP156]] to i32 -; CHECK-NEXT: [[TMP158:%.*]] = lshr i64 [[TMP156]], 32 -; CHECK-NEXT: [[TMP159:%.*]] = trunc i64 [[TMP158]] to i32 -; CHECK-NEXT: [[TMP160:%.*]] = mul i32 [[TMP159]], [[TMP131]] -; CHECK-NEXT: [[TMP161:%.*]] = sub i32 [[TMP130]], [[TMP160]] -; CHECK-NEXT: [[TMP162:%.*]] = icmp uge i32 [[TMP161]], [[TMP131]] -; CHECK-NEXT: [[TMP163:%.*]] = select i1 [[TMP162]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP164:%.*]] = icmp uge i32 [[TMP130]], [[TMP160]] -; CHECK-NEXT: [[TMP165:%.*]] = select i1 [[TMP164]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP166:%.*]] = and i32 [[TMP163]], [[TMP165]] -; CHECK-NEXT: [[TMP167:%.*]] = icmp eq i32 [[TMP166]], 0 -; CHECK-NEXT: [[TMP168:%.*]] = sub i32 [[TMP161]], [[TMP131]] -; CHECK-NEXT: [[TMP169:%.*]] = add i32 [[TMP161]], [[TMP131]] -; CHECK-NEXT: [[TMP170:%.*]] = select i1 [[TMP167]], i32 [[TMP161]], i32 [[TMP168]] -; CHECK-NEXT: [[TMP171:%.*]] = select i1 [[TMP164]], i32 [[TMP170]], i32 [[TMP169]] -; CHECK-NEXT: [[TMP172:%.*]] = insertelement <4 x i32> [[TMP129]], i32 [[TMP171]], i64 3 -; CHECK-NEXT: store <4 x i32> [[TMP172]], <4 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP151:%.*]] = mul i32 [[TMP150]], [[TMP122]] +; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP121]], [[TMP151]] +; CHECK-NEXT: [[TMP153:%.*]] = icmp uge i32 [[TMP152]], [[TMP122]] +; CHECK-NEXT: [[TMP154:%.*]] = icmp uge i32 [[TMP121]], [[TMP151]] +; CHECK-NEXT: [[TMP155:%.*]] = and i1 [[TMP153]], [[TMP154]] +; CHECK-NEXT: [[TMP156:%.*]] = sub i32 [[TMP152]], [[TMP122]] +; CHECK-NEXT: [[TMP157:%.*]] = add i32 [[TMP152]], [[TMP122]] +; CHECK-NEXT: [[TMP158:%.*]] = select i1 [[TMP155]], i32 [[TMP156]], i32 [[TMP152]] +; CHECK-NEXT: [[TMP159:%.*]] = select i1 [[TMP154]], i32 [[TMP158]], i32 [[TMP157]] +; CHECK-NEXT: [[TMP160:%.*]] = insertelement <4 x i32> [[TMP120]], i32 [[TMP159]], i64 3 +; CHECK-NEXT: store <4 x i32> [[TMP160]], <4 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: urem_v4i32: @@ -1402,175 +1366,163 @@ ; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP9]] ; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP8]], [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP39]], [[TMP9]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP42:%.*]] = icmp uge i32 [[TMP8]], [[TMP38]] -; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP41]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[TMP44]], 0 -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP37]], 1 -; CHECK-NEXT: [[TMP47:%.*]] = sub i32 [[TMP37]], 1 -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP45]], i32 [[TMP37]], i32 [[TMP46]] -; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP42]], i32 [[TMP48]], i32 [[TMP47]] -; CHECK-NEXT: [[TMP50:%.*]] = xor i32 [[TMP49]], [[TMP5]] -; CHECK-NEXT: [[TMP51:%.*]] = sub i32 [[TMP50]], [[TMP5]] -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x i32> undef, i32 [[TMP51]], i64 0 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i32> [[Y]], i64 1 -; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 -; CHECK-NEXT: [[TMP56:%.*]] = ashr i32 [[TMP54]], 31 -; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] -; CHECK-NEXT: [[TMP58:%.*]] = add i32 [[TMP53]], [[TMP55]] -; CHECK-NEXT: [[TMP59:%.*]] = add i32 [[TMP54]], [[TMP56]] -; CHECK-NEXT: [[TMP60:%.*]] = xor i32 [[TMP58]], [[TMP55]] -; CHECK-NEXT: [[TMP61:%.*]] = xor i32 [[TMP59]], [[TMP56]] -; CHECK-NEXT: [[TMP62:%.*]] = uitofp i32 [[TMP61]] to float -; CHECK-NEXT: [[TMP63:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP62]]) -; CHECK-NEXT: [[TMP64:%.*]] = fmul fast float [[TMP63]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP65:%.*]] = fptoui float [[TMP64]] to i32 -; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP65]] to i64 -; CHECK-NEXT: [[TMP67:%.*]] = zext i32 [[TMP61]] to i64 -; CHECK-NEXT: [[TMP68:%.*]] = mul i64 [[TMP66]], [[TMP67]] -; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 -; CHECK-NEXT: [[TMP70:%.*]] = lshr i64 [[TMP68]], 32 -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = sub i32 0, [[TMP69]] -; CHECK-NEXT: [[TMP73:%.*]] = icmp eq i32 [[TMP71]], 0 -; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], i32 [[TMP72]], i32 [[TMP69]] -; CHECK-NEXT: [[TMP75:%.*]] = zext i32 [[TMP74]] to i64 -; CHECK-NEXT: [[TMP76:%.*]] = zext i32 [[TMP65]] to i64 -; CHECK-NEXT: [[TMP77:%.*]] = mul i64 [[TMP75]], [[TMP76]] -; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 -; CHECK-NEXT: [[TMP79:%.*]] = lshr i64 [[TMP77]], 32 -; CHECK-NEXT: [[TMP80:%.*]] = trunc i64 [[TMP79]] to i32 -; CHECK-NEXT: [[TMP81:%.*]] = add i32 [[TMP65]], [[TMP80]] -; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP65]], [[TMP80]] -; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP73]], i32 [[TMP81]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP84:%.*]] = zext i32 [[TMP83]] to i64 -; CHECK-NEXT: [[TMP85:%.*]] = zext i32 [[TMP60]] to i64 -; CHECK-NEXT: [[TMP86:%.*]] = mul i64 [[TMP84]], [[TMP85]] -; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 -; CHECK-NEXT: [[TMP88:%.*]] = lshr i64 [[TMP86]], 32 -; CHECK-NEXT: [[TMP89:%.*]] = trunc i64 [[TMP88]] to i32 -; CHECK-NEXT: [[TMP90:%.*]] = mul i32 [[TMP89]], [[TMP61]] -; CHECK-NEXT: [[TMP91:%.*]] = sub i32 [[TMP60]], [[TMP90]] -; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP91]], [[TMP61]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP94:%.*]] = icmp uge i32 [[TMP60]], [[TMP90]] -; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP96:%.*]] = and i32 [[TMP93]], [[TMP95]] -; CHECK-NEXT: [[TMP97:%.*]] = icmp eq i32 [[TMP96]], 0 -; CHECK-NEXT: [[TMP98:%.*]] = add i32 [[TMP89]], 1 -; CHECK-NEXT: [[TMP99:%.*]] = sub i32 [[TMP89]], 1 -; CHECK-NEXT: [[TMP100:%.*]] = select i1 [[TMP97]], i32 [[TMP89]], i32 [[TMP98]] -; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP94]], i32 [[TMP100]], i32 [[TMP99]] -; CHECK-NEXT: [[TMP102:%.*]] = xor i32 [[TMP101]], [[TMP57]] -; CHECK-NEXT: [[TMP103:%.*]] = sub i32 [[TMP102]], [[TMP57]] -; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i32> [[TMP52]], i32 [[TMP103]], i64 1 -; CHECK-NEXT: [[TMP105:%.*]] = extractelement <4 x i32> [[X]], i64 2 -; CHECK-NEXT: [[TMP106:%.*]] = extractelement <4 x i32> [[Y]], i64 2 -; CHECK-NEXT: [[TMP107:%.*]] = ashr i32 [[TMP105]], 31 -; CHECK-NEXT: [[TMP108:%.*]] = ashr i32 [[TMP106]], 31 -; CHECK-NEXT: [[TMP109:%.*]] = xor i32 [[TMP107]], [[TMP108]] -; CHECK-NEXT: [[TMP110:%.*]] = add i32 [[TMP105]], [[TMP107]] -; CHECK-NEXT: [[TMP111:%.*]] = add i32 [[TMP106]], [[TMP108]] -; CHECK-NEXT: [[TMP112:%.*]] = xor i32 [[TMP110]], [[TMP107]] -; CHECK-NEXT: [[TMP113:%.*]] = xor i32 [[TMP111]], [[TMP108]] -; CHECK-NEXT: [[TMP114:%.*]] = uitofp i32 [[TMP113]] to float -; CHECK-NEXT: [[TMP115:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP114]]) -; CHECK-NEXT: [[TMP116:%.*]] = fmul fast float [[TMP115]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP117:%.*]] = fptoui float [[TMP116]] to i32 -; CHECK-NEXT: [[TMP118:%.*]] = zext i32 [[TMP117]] to i64 -; CHECK-NEXT: [[TMP119:%.*]] = zext i32 [[TMP113]] to i64 -; CHECK-NEXT: [[TMP120:%.*]] = mul i64 [[TMP118]], [[TMP119]] -; CHECK-NEXT: [[TMP121:%.*]] = trunc i64 [[TMP120]] to i32 -; CHECK-NEXT: [[TMP122:%.*]] = lshr i64 [[TMP120]], 32 -; CHECK-NEXT: [[TMP123:%.*]] = trunc i64 [[TMP122]] to i32 -; CHECK-NEXT: [[TMP124:%.*]] = sub i32 0, [[TMP121]] -; CHECK-NEXT: [[TMP125:%.*]] = icmp eq i32 [[TMP123]], 0 -; CHECK-NEXT: [[TMP126:%.*]] = select i1 [[TMP125]], i32 [[TMP124]], i32 [[TMP121]] -; CHECK-NEXT: [[TMP127:%.*]] = zext i32 [[TMP126]] to i64 -; CHECK-NEXT: [[TMP128:%.*]] = zext i32 [[TMP117]] to i64 -; CHECK-NEXT: [[TMP129:%.*]] = mul i64 [[TMP127]], [[TMP128]] -; CHECK-NEXT: [[TMP130:%.*]] = trunc i64 [[TMP129]] to i32 -; CHECK-NEXT: [[TMP131:%.*]] = lshr i64 [[TMP129]], 32 -; CHECK-NEXT: [[TMP132:%.*]] = trunc i64 [[TMP131]] to i32 -; CHECK-NEXT: [[TMP133:%.*]] = add i32 [[TMP117]], [[TMP132]] -; CHECK-NEXT: [[TMP134:%.*]] = sub i32 [[TMP117]], [[TMP132]] -; CHECK-NEXT: [[TMP135:%.*]] = select i1 [[TMP125]], i32 [[TMP133]], i32 [[TMP134]] -; CHECK-NEXT: [[TMP136:%.*]] = zext i32 [[TMP135]] to i64 -; CHECK-NEXT: [[TMP137:%.*]] = zext i32 [[TMP112]] to i64 -; CHECK-NEXT: [[TMP138:%.*]] = mul i64 [[TMP136]], [[TMP137]] -; CHECK-NEXT: [[TMP139:%.*]] = trunc i64 [[TMP138]] to i32 -; CHECK-NEXT: [[TMP140:%.*]] = lshr i64 [[TMP138]], 32 -; CHECK-NEXT: [[TMP141:%.*]] = trunc i64 [[TMP140]] to i32 -; CHECK-NEXT: [[TMP142:%.*]] = mul i32 [[TMP141]], [[TMP113]] -; CHECK-NEXT: [[TMP143:%.*]] = sub i32 [[TMP112]], [[TMP142]] -; CHECK-NEXT: [[TMP144:%.*]] = icmp uge i32 [[TMP143]], [[TMP113]] -; CHECK-NEXT: [[TMP145:%.*]] = select i1 [[TMP144]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP146:%.*]] = icmp uge i32 [[TMP112]], [[TMP142]] -; CHECK-NEXT: [[TMP147:%.*]] = select i1 [[TMP146]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP148:%.*]] = and i32 [[TMP145]], [[TMP147]] -; CHECK-NEXT: [[TMP149:%.*]] = icmp eq i32 [[TMP148]], 0 -; CHECK-NEXT: [[TMP150:%.*]] = add i32 [[TMP141]], 1 -; CHECK-NEXT: [[TMP151:%.*]] = sub i32 [[TMP141]], 1 -; CHECK-NEXT: [[TMP152:%.*]] = select i1 [[TMP149]], i32 [[TMP141]], i32 [[TMP150]] -; CHECK-NEXT: [[TMP153:%.*]] = select i1 [[TMP146]], i32 [[TMP152]], i32 [[TMP151]] -; CHECK-NEXT: [[TMP154:%.*]] = xor i32 [[TMP153]], [[TMP109]] -; CHECK-NEXT: [[TMP155:%.*]] = sub i32 [[TMP154]], [[TMP109]] -; CHECK-NEXT: [[TMP156:%.*]] = insertelement <4 x i32> [[TMP104]], i32 [[TMP155]], i64 2 -; CHECK-NEXT: [[TMP157:%.*]] = extractelement <4 x i32> [[X]], i64 3 -; CHECK-NEXT: [[TMP158:%.*]] = extractelement <4 x i32> [[Y]], i64 3 -; CHECK-NEXT: [[TMP159:%.*]] = ashr i32 [[TMP157]], 31 -; CHECK-NEXT: [[TMP160:%.*]] = ashr i32 [[TMP158]], 31 -; CHECK-NEXT: [[TMP161:%.*]] = xor i32 [[TMP159]], [[TMP160]] -; CHECK-NEXT: [[TMP162:%.*]] = add i32 [[TMP157]], [[TMP159]] -; CHECK-NEXT: [[TMP163:%.*]] = add i32 [[TMP158]], [[TMP160]] -; CHECK-NEXT: [[TMP164:%.*]] = xor i32 [[TMP162]], [[TMP159]] -; CHECK-NEXT: [[TMP165:%.*]] = xor i32 [[TMP163]], [[TMP160]] -; CHECK-NEXT: [[TMP166:%.*]] = uitofp i32 [[TMP165]] to float -; CHECK-NEXT: [[TMP167:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP166]]) -; CHECK-NEXT: [[TMP168:%.*]] = fmul fast float [[TMP167]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP169:%.*]] = fptoui float [[TMP168]] to i32 +; CHECK-NEXT: [[TMP41:%.*]] = icmp uge i32 [[TMP8]], [[TMP38]] +; CHECK-NEXT: [[TMP42:%.*]] = and i1 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP37]], 1 +; CHECK-NEXT: [[TMP44:%.*]] = sub i32 [[TMP37]], 1 +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP42]], i32 [[TMP43]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP41]], i32 [[TMP45]], i32 [[TMP44]] +; CHECK-NEXT: [[TMP47:%.*]] = xor i32 [[TMP46]], [[TMP5]] +; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP47]], [[TMP5]] +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <4 x i32> undef, i32 [[TMP48]], i64 0 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <4 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <4 x i32> [[Y]], i64 1 +; CHECK-NEXT: [[TMP52:%.*]] = ashr i32 [[TMP50]], 31 +; CHECK-NEXT: [[TMP53:%.*]] = ashr i32 [[TMP51]], 31 +; CHECK-NEXT: [[TMP54:%.*]] = xor i32 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = add i32 [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP51]], [[TMP53]] +; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP52]] +; CHECK-NEXT: [[TMP58:%.*]] = xor i32 [[TMP56]], [[TMP53]] +; CHECK-NEXT: [[TMP59:%.*]] = uitofp i32 [[TMP58]] to float +; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP59]]) +; CHECK-NEXT: [[TMP61:%.*]] = fmul fast float [[TMP60]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP62:%.*]] = fptoui float [[TMP61]] to i32 +; CHECK-NEXT: [[TMP63:%.*]] = zext i32 [[TMP62]] to i64 +; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[TMP58]] to i64 +; CHECK-NEXT: [[TMP65:%.*]] = mul i64 [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = trunc i64 [[TMP65]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = lshr i64 [[TMP65]], 32 +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = sub i32 0, [[TMP66]] +; CHECK-NEXT: [[TMP70:%.*]] = icmp eq i32 [[TMP68]], 0 +; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[TMP69]], i32 [[TMP66]] +; CHECK-NEXT: [[TMP72:%.*]] = zext i32 [[TMP71]] to i64 +; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP62]] to i64 +; CHECK-NEXT: [[TMP74:%.*]] = mul i64 [[TMP72]], [[TMP73]] +; CHECK-NEXT: [[TMP75:%.*]] = trunc i64 [[TMP74]] to i32 +; CHECK-NEXT: [[TMP76:%.*]] = lshr i64 [[TMP74]], 32 +; CHECK-NEXT: [[TMP77:%.*]] = trunc i64 [[TMP76]] to i32 +; CHECK-NEXT: [[TMP78:%.*]] = add i32 [[TMP62]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = sub i32 [[TMP62]], [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = select i1 [[TMP70]], i32 [[TMP78]], i32 [[TMP79]] +; CHECK-NEXT: [[TMP81:%.*]] = zext i32 [[TMP80]] to i64 +; CHECK-NEXT: [[TMP82:%.*]] = zext i32 [[TMP57]] to i64 +; CHECK-NEXT: [[TMP83:%.*]] = mul i64 [[TMP81]], [[TMP82]] +; CHECK-NEXT: [[TMP84:%.*]] = trunc i64 [[TMP83]] to i32 +; CHECK-NEXT: [[TMP85:%.*]] = lshr i64 [[TMP83]], 32 +; CHECK-NEXT: [[TMP86:%.*]] = trunc i64 [[TMP85]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = mul i32 [[TMP86]], [[TMP58]] +; CHECK-NEXT: [[TMP88:%.*]] = sub i32 [[TMP57]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = icmp uge i32 [[TMP88]], [[TMP58]] +; CHECK-NEXT: [[TMP90:%.*]] = icmp uge i32 [[TMP57]], [[TMP87]] +; CHECK-NEXT: [[TMP91:%.*]] = and i1 [[TMP89]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP86]], 1 +; CHECK-NEXT: [[TMP93:%.*]] = sub i32 [[TMP86]], 1 +; CHECK-NEXT: [[TMP94:%.*]] = select i1 [[TMP91]], i32 [[TMP92]], i32 [[TMP86]] +; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP90]], i32 [[TMP94]], i32 [[TMP93]] +; CHECK-NEXT: [[TMP96:%.*]] = xor i32 [[TMP95]], [[TMP54]] +; CHECK-NEXT: [[TMP97:%.*]] = sub i32 [[TMP96]], [[TMP54]] +; CHECK-NEXT: [[TMP98:%.*]] = insertelement <4 x i32> [[TMP49]], i32 [[TMP97]], i64 1 +; CHECK-NEXT: [[TMP99:%.*]] = extractelement <4 x i32> [[X]], i64 2 +; CHECK-NEXT: [[TMP100:%.*]] = extractelement <4 x i32> [[Y]], i64 2 +; CHECK-NEXT: [[TMP101:%.*]] = ashr i32 [[TMP99]], 31 +; CHECK-NEXT: [[TMP102:%.*]] = ashr i32 [[TMP100]], 31 +; CHECK-NEXT: [[TMP103:%.*]] = xor i32 [[TMP101]], [[TMP102]] +; CHECK-NEXT: [[TMP104:%.*]] = add i32 [[TMP99]], [[TMP101]] +; CHECK-NEXT: [[TMP105:%.*]] = add i32 [[TMP100]], [[TMP102]] +; CHECK-NEXT: [[TMP106:%.*]] = xor i32 [[TMP104]], [[TMP101]] +; CHECK-NEXT: [[TMP107:%.*]] = xor i32 [[TMP105]], [[TMP102]] +; CHECK-NEXT: [[TMP108:%.*]] = uitofp i32 [[TMP107]] to float +; CHECK-NEXT: [[TMP109:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP108]]) +; CHECK-NEXT: [[TMP110:%.*]] = fmul fast float [[TMP109]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP111:%.*]] = fptoui float [[TMP110]] to i32 +; CHECK-NEXT: [[TMP112:%.*]] = zext i32 [[TMP111]] to i64 +; CHECK-NEXT: [[TMP113:%.*]] = zext i32 [[TMP107]] to i64 +; CHECK-NEXT: [[TMP114:%.*]] = mul i64 [[TMP112]], [[TMP113]] +; CHECK-NEXT: [[TMP115:%.*]] = trunc i64 [[TMP114]] to i32 +; CHECK-NEXT: [[TMP116:%.*]] = lshr i64 [[TMP114]], 32 +; CHECK-NEXT: [[TMP117:%.*]] = trunc i64 [[TMP116]] to i32 +; CHECK-NEXT: [[TMP118:%.*]] = sub i32 0, [[TMP115]] +; CHECK-NEXT: [[TMP119:%.*]] = icmp eq i32 [[TMP117]], 0 +; CHECK-NEXT: [[TMP120:%.*]] = select i1 [[TMP119]], i32 [[TMP118]], i32 [[TMP115]] +; CHECK-NEXT: [[TMP121:%.*]] = zext i32 [[TMP120]] to i64 +; CHECK-NEXT: [[TMP122:%.*]] = zext i32 [[TMP111]] to i64 +; CHECK-NEXT: [[TMP123:%.*]] = mul i64 [[TMP121]], [[TMP122]] +; CHECK-NEXT: [[TMP124:%.*]] = trunc i64 [[TMP123]] to i32 +; CHECK-NEXT: [[TMP125:%.*]] = lshr i64 [[TMP123]], 32 +; CHECK-NEXT: [[TMP126:%.*]] = trunc i64 [[TMP125]] to i32 +; CHECK-NEXT: [[TMP127:%.*]] = add i32 [[TMP111]], [[TMP126]] +; CHECK-NEXT: [[TMP128:%.*]] = sub i32 [[TMP111]], [[TMP126]] +; CHECK-NEXT: [[TMP129:%.*]] = select i1 [[TMP119]], i32 [[TMP127]], i32 [[TMP128]] +; CHECK-NEXT: [[TMP130:%.*]] = zext i32 [[TMP129]] to i64 +; CHECK-NEXT: [[TMP131:%.*]] = zext i32 [[TMP106]] to i64 +; CHECK-NEXT: [[TMP132:%.*]] = mul i64 [[TMP130]], [[TMP131]] +; CHECK-NEXT: [[TMP133:%.*]] = trunc i64 [[TMP132]] to i32 +; CHECK-NEXT: [[TMP134:%.*]] = lshr i64 [[TMP132]], 32 +; CHECK-NEXT: [[TMP135:%.*]] = trunc i64 [[TMP134]] to i32 +; CHECK-NEXT: [[TMP136:%.*]] = mul i32 [[TMP135]], [[TMP107]] +; CHECK-NEXT: [[TMP137:%.*]] = sub i32 [[TMP106]], [[TMP136]] +; CHECK-NEXT: [[TMP138:%.*]] = icmp uge i32 [[TMP137]], [[TMP107]] +; CHECK-NEXT: [[TMP139:%.*]] = icmp uge i32 [[TMP106]], [[TMP136]] +; CHECK-NEXT: [[TMP140:%.*]] = and i1 [[TMP138]], [[TMP139]] +; CHECK-NEXT: [[TMP141:%.*]] = add i32 [[TMP135]], 1 +; CHECK-NEXT: [[TMP142:%.*]] = sub i32 [[TMP135]], 1 +; CHECK-NEXT: [[TMP143:%.*]] = select i1 [[TMP140]], i32 [[TMP141]], i32 [[TMP135]] +; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP139]], i32 [[TMP143]], i32 [[TMP142]] +; CHECK-NEXT: [[TMP145:%.*]] = xor i32 [[TMP144]], [[TMP103]] +; CHECK-NEXT: [[TMP146:%.*]] = sub i32 [[TMP145]], [[TMP103]] +; CHECK-NEXT: [[TMP147:%.*]] = insertelement <4 x i32> [[TMP98]], i32 [[TMP146]], i64 2 +; CHECK-NEXT: [[TMP148:%.*]] = extractelement <4 x i32> [[X]], i64 3 +; CHECK-NEXT: [[TMP149:%.*]] = extractelement <4 x i32> [[Y]], i64 3 +; CHECK-NEXT: [[TMP150:%.*]] = ashr i32 [[TMP148]], 31 +; CHECK-NEXT: [[TMP151:%.*]] = ashr i32 [[TMP149]], 31 +; CHECK-NEXT: [[TMP152:%.*]] = xor i32 [[TMP150]], [[TMP151]] +; CHECK-NEXT: [[TMP153:%.*]] = add i32 [[TMP148]], [[TMP150]] +; CHECK-NEXT: [[TMP154:%.*]] = add i32 [[TMP149]], [[TMP151]] +; CHECK-NEXT: [[TMP155:%.*]] = xor i32 [[TMP153]], [[TMP150]] +; CHECK-NEXT: [[TMP156:%.*]] = xor i32 [[TMP154]], [[TMP151]] +; CHECK-NEXT: [[TMP157:%.*]] = uitofp i32 [[TMP156]] to float +; CHECK-NEXT: [[TMP158:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP157]]) +; CHECK-NEXT: [[TMP159:%.*]] = fmul fast float [[TMP158]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP160:%.*]] = fptoui float [[TMP159]] to i32 +; CHECK-NEXT: [[TMP161:%.*]] = zext i32 [[TMP160]] to i64 +; CHECK-NEXT: [[TMP162:%.*]] = zext i32 [[TMP156]] to i64 +; CHECK-NEXT: [[TMP163:%.*]] = mul i64 [[TMP161]], [[TMP162]] +; CHECK-NEXT: [[TMP164:%.*]] = trunc i64 [[TMP163]] to i32 +; CHECK-NEXT: [[TMP165:%.*]] = lshr i64 [[TMP163]], 32 +; CHECK-NEXT: [[TMP166:%.*]] = trunc i64 [[TMP165]] to i32 +; CHECK-NEXT: [[TMP167:%.*]] = sub i32 0, [[TMP164]] +; CHECK-NEXT: [[TMP168:%.*]] = icmp eq i32 [[TMP166]], 0 +; CHECK-NEXT: [[TMP169:%.*]] = select i1 [[TMP168]], i32 [[TMP167]], i32 [[TMP164]] ; CHECK-NEXT: [[TMP170:%.*]] = zext i32 [[TMP169]] to i64 -; CHECK-NEXT: [[TMP171:%.*]] = zext i32 [[TMP165]] to i64 +; CHECK-NEXT: [[TMP171:%.*]] = zext i32 [[TMP160]] to i64 ; CHECK-NEXT: [[TMP172:%.*]] = mul i64 [[TMP170]], [[TMP171]] ; CHECK-NEXT: [[TMP173:%.*]] = trunc i64 [[TMP172]] to i32 ; CHECK-NEXT: [[TMP174:%.*]] = lshr i64 [[TMP172]], 32 ; CHECK-NEXT: [[TMP175:%.*]] = trunc i64 [[TMP174]] to i32 -; CHECK-NEXT: [[TMP176:%.*]] = sub i32 0, [[TMP173]] -; CHECK-NEXT: [[TMP177:%.*]] = icmp eq i32 [[TMP175]], 0 -; CHECK-NEXT: [[TMP178:%.*]] = select i1 [[TMP177]], i32 [[TMP176]], i32 [[TMP173]] +; CHECK-NEXT: [[TMP176:%.*]] = add i32 [[TMP160]], [[TMP175]] +; CHECK-NEXT: [[TMP177:%.*]] = sub i32 [[TMP160]], [[TMP175]] +; CHECK-NEXT: [[TMP178:%.*]] = select i1 [[TMP168]], i32 [[TMP176]], i32 [[TMP177]] ; CHECK-NEXT: [[TMP179:%.*]] = zext i32 [[TMP178]] to i64 -; CHECK-NEXT: [[TMP180:%.*]] = zext i32 [[TMP169]] to i64 +; CHECK-NEXT: [[TMP180:%.*]] = zext i32 [[TMP155]] to i64 ; CHECK-NEXT: [[TMP181:%.*]] = mul i64 [[TMP179]], [[TMP180]] ; CHECK-NEXT: [[TMP182:%.*]] = trunc i64 [[TMP181]] to i32 ; CHECK-NEXT: [[TMP183:%.*]] = lshr i64 [[TMP181]], 32 ; CHECK-NEXT: [[TMP184:%.*]] = trunc i64 [[TMP183]] to i32 -; CHECK-NEXT: [[TMP185:%.*]] = add i32 [[TMP169]], [[TMP184]] -; CHECK-NEXT: [[TMP186:%.*]] = sub i32 [[TMP169]], [[TMP184]] -; CHECK-NEXT: [[TMP187:%.*]] = select i1 [[TMP177]], i32 [[TMP185]], i32 [[TMP186]] -; CHECK-NEXT: [[TMP188:%.*]] = zext i32 [[TMP187]] to i64 -; CHECK-NEXT: [[TMP189:%.*]] = zext i32 [[TMP164]] to i64 -; CHECK-NEXT: [[TMP190:%.*]] = mul i64 [[TMP188]], [[TMP189]] -; CHECK-NEXT: [[TMP191:%.*]] = trunc i64 [[TMP190]] to i32 -; CHECK-NEXT: [[TMP192:%.*]] = lshr i64 [[TMP190]], 32 -; CHECK-NEXT: [[TMP193:%.*]] = trunc i64 [[TMP192]] to i32 -; CHECK-NEXT: [[TMP194:%.*]] = mul i32 [[TMP193]], [[TMP165]] -; CHECK-NEXT: [[TMP195:%.*]] = sub i32 [[TMP164]], [[TMP194]] -; CHECK-NEXT: [[TMP196:%.*]] = icmp uge i32 [[TMP195]], [[TMP165]] -; CHECK-NEXT: [[TMP197:%.*]] = select i1 [[TMP196]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP198:%.*]] = icmp uge i32 [[TMP164]], [[TMP194]] -; CHECK-NEXT: [[TMP199:%.*]] = select i1 [[TMP198]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP200:%.*]] = and i32 [[TMP197]], [[TMP199]] -; CHECK-NEXT: [[TMP201:%.*]] = icmp eq i32 [[TMP200]], 0 -; CHECK-NEXT: [[TMP202:%.*]] = add i32 [[TMP193]], 1 -; CHECK-NEXT: [[TMP203:%.*]] = sub i32 [[TMP193]], 1 -; CHECK-NEXT: [[TMP204:%.*]] = select i1 [[TMP201]], i32 [[TMP193]], i32 [[TMP202]] -; CHECK-NEXT: [[TMP205:%.*]] = select i1 [[TMP198]], i32 [[TMP204]], i32 [[TMP203]] -; CHECK-NEXT: [[TMP206:%.*]] = xor i32 [[TMP205]], [[TMP161]] -; CHECK-NEXT: [[TMP207:%.*]] = sub i32 [[TMP206]], [[TMP161]] -; CHECK-NEXT: [[TMP208:%.*]] = insertelement <4 x i32> [[TMP156]], i32 [[TMP207]], i64 3 -; CHECK-NEXT: store <4 x i32> [[TMP208]], <4 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP185:%.*]] = mul i32 [[TMP184]], [[TMP156]] +; CHECK-NEXT: [[TMP186:%.*]] = sub i32 [[TMP155]], [[TMP185]] +; CHECK-NEXT: [[TMP187:%.*]] = icmp uge i32 [[TMP186]], [[TMP156]] +; CHECK-NEXT: [[TMP188:%.*]] = icmp uge i32 [[TMP155]], [[TMP185]] +; CHECK-NEXT: [[TMP189:%.*]] = and i1 [[TMP187]], [[TMP188]] +; CHECK-NEXT: [[TMP190:%.*]] = add i32 [[TMP184]], 1 +; CHECK-NEXT: [[TMP191:%.*]] = sub i32 [[TMP184]], 1 +; CHECK-NEXT: [[TMP192:%.*]] = select i1 [[TMP189]], i32 [[TMP190]], i32 [[TMP184]] +; CHECK-NEXT: [[TMP193:%.*]] = select i1 [[TMP188]], i32 [[TMP192]], i32 [[TMP191]] +; CHECK-NEXT: [[TMP194:%.*]] = xor i32 [[TMP193]], [[TMP152]] +; CHECK-NEXT: [[TMP195:%.*]] = sub i32 [[TMP194]], [[TMP152]] +; CHECK-NEXT: [[TMP196:%.*]] = insertelement <4 x i32> [[TMP147]], i32 [[TMP195]], i64 3 +; CHECK-NEXT: store <4 x i32> [[TMP196]], <4 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: sdiv_v4i32: @@ -1757,172 +1709,160 @@ ; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], [[TMP8]] ; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP7]], [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP41:%.*]] = icmp uge i32 [[TMP7]], [[TMP37]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], [[TMP42]] -; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], 0 -; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP44]], i32 [[TMP38]], i32 [[TMP45]] -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], i32 [[TMP47]], i32 [[TMP46]] -; CHECK-NEXT: [[TMP49:%.*]] = xor i32 [[TMP48]], [[TMP3]] -; CHECK-NEXT: [[TMP50:%.*]] = sub i32 [[TMP49]], [[TMP3]] -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <4 x i32> undef, i32 [[TMP50]], i64 0 -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <4 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[Y]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP52]], 31 -; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 -; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP52]], [[TMP54]] -; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP53]], [[TMP55]] -; CHECK-NEXT: [[TMP58:%.*]] = xor i32 [[TMP56]], [[TMP54]] -; CHECK-NEXT: [[TMP59:%.*]] = xor i32 [[TMP57]], [[TMP55]] -; CHECK-NEXT: [[TMP60:%.*]] = uitofp i32 [[TMP59]] to float -; CHECK-NEXT: [[TMP61:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP60]]) -; CHECK-NEXT: [[TMP62:%.*]] = fmul fast float [[TMP61]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP63:%.*]] = fptoui float [[TMP62]] to i32 -; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[TMP63]] to i64 -; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP59]] to i64 -; CHECK-NEXT: [[TMP66:%.*]] = mul i64 [[TMP64]], [[TMP65]] -; CHECK-NEXT: [[TMP67:%.*]] = trunc i64 [[TMP66]] to i32 -; CHECK-NEXT: [[TMP68:%.*]] = lshr i64 [[TMP66]], 32 -; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 -; CHECK-NEXT: [[TMP70:%.*]] = sub i32 0, [[TMP67]] -; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[TMP69]], 0 -; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP70]], i32 [[TMP67]] -; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP72]] to i64 -; CHECK-NEXT: [[TMP74:%.*]] = zext i32 [[TMP63]] to i64 -; CHECK-NEXT: [[TMP75:%.*]] = mul i64 [[TMP73]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = trunc i64 [[TMP75]] to i32 -; CHECK-NEXT: [[TMP77:%.*]] = lshr i64 [[TMP75]], 32 -; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 -; CHECK-NEXT: [[TMP79:%.*]] = add i32 [[TMP63]], [[TMP78]] -; CHECK-NEXT: [[TMP80:%.*]] = sub i32 [[TMP63]], [[TMP78]] -; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP71]], i32 [[TMP79]], i32 [[TMP80]] -; CHECK-NEXT: [[TMP82:%.*]] = zext i32 [[TMP81]] to i64 -; CHECK-NEXT: [[TMP83:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP82]], [[TMP83]] -; CHECK-NEXT: [[TMP85:%.*]] = trunc i64 [[TMP84]] to i32 -; CHECK-NEXT: [[TMP86:%.*]] = lshr i64 [[TMP84]], 32 -; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 -; CHECK-NEXT: [[TMP88:%.*]] = mul i32 [[TMP87]], [[TMP59]] -; CHECK-NEXT: [[TMP89:%.*]] = sub i32 [[TMP58]], [[TMP88]] -; CHECK-NEXT: [[TMP90:%.*]] = icmp uge i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP58]], [[TMP88]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP94:%.*]] = and i32 [[TMP91]], [[TMP93]] -; CHECK-NEXT: [[TMP95:%.*]] = icmp eq i32 [[TMP94]], 0 -; CHECK-NEXT: [[TMP96:%.*]] = sub i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP97:%.*]] = add i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP95]], i32 [[TMP89]], i32 [[TMP96]] -; CHECK-NEXT: [[TMP99:%.*]] = select i1 [[TMP92]], i32 [[TMP98]], i32 [[TMP97]] -; CHECK-NEXT: [[TMP100:%.*]] = xor i32 [[TMP99]], [[TMP54]] -; CHECK-NEXT: [[TMP101:%.*]] = sub i32 [[TMP100]], [[TMP54]] -; CHECK-NEXT: [[TMP102:%.*]] = insertelement <4 x i32> [[TMP51]], i32 [[TMP101]], i64 1 -; CHECK-NEXT: [[TMP103:%.*]] = extractelement <4 x i32> [[X]], i64 2 -; CHECK-NEXT: [[TMP104:%.*]] = extractelement <4 x i32> [[Y]], i64 2 -; CHECK-NEXT: [[TMP105:%.*]] = ashr i32 [[TMP103]], 31 -; CHECK-NEXT: [[TMP106:%.*]] = ashr i32 [[TMP104]], 31 -; CHECK-NEXT: [[TMP107:%.*]] = add i32 [[TMP103]], [[TMP105]] -; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP104]], [[TMP106]] -; CHECK-NEXT: [[TMP109:%.*]] = xor i32 [[TMP107]], [[TMP105]] -; CHECK-NEXT: [[TMP110:%.*]] = xor i32 [[TMP108]], [[TMP106]] -; CHECK-NEXT: [[TMP111:%.*]] = uitofp i32 [[TMP110]] to float -; CHECK-NEXT: [[TMP112:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP111]]) -; CHECK-NEXT: [[TMP113:%.*]] = fmul fast float [[TMP112]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP114:%.*]] = fptoui float [[TMP113]] to i32 -; CHECK-NEXT: [[TMP115:%.*]] = zext i32 [[TMP114]] to i64 -; CHECK-NEXT: [[TMP116:%.*]] = zext i32 [[TMP110]] to i64 -; CHECK-NEXT: [[TMP117:%.*]] = mul i64 [[TMP115]], [[TMP116]] -; CHECK-NEXT: [[TMP118:%.*]] = trunc i64 [[TMP117]] to i32 -; CHECK-NEXT: [[TMP119:%.*]] = lshr i64 [[TMP117]], 32 -; CHECK-NEXT: [[TMP120:%.*]] = trunc i64 [[TMP119]] to i32 -; CHECK-NEXT: [[TMP121:%.*]] = sub i32 0, [[TMP118]] -; CHECK-NEXT: [[TMP122:%.*]] = icmp eq i32 [[TMP120]], 0 -; CHECK-NEXT: [[TMP123:%.*]] = select i1 [[TMP122]], i32 [[TMP121]], i32 [[TMP118]] -; CHECK-NEXT: [[TMP124:%.*]] = zext i32 [[TMP123]] to i64 -; CHECK-NEXT: [[TMP125:%.*]] = zext i32 [[TMP114]] to i64 -; CHECK-NEXT: [[TMP126:%.*]] = mul i64 [[TMP124]], [[TMP125]] -; CHECK-NEXT: [[TMP127:%.*]] = trunc i64 [[TMP126]] to i32 -; CHECK-NEXT: [[TMP128:%.*]] = lshr i64 [[TMP126]], 32 -; CHECK-NEXT: [[TMP129:%.*]] = trunc i64 [[TMP128]] to i32 -; CHECK-NEXT: [[TMP130:%.*]] = add i32 [[TMP114]], [[TMP129]] -; CHECK-NEXT: [[TMP131:%.*]] = sub i32 [[TMP114]], [[TMP129]] -; CHECK-NEXT: [[TMP132:%.*]] = select i1 [[TMP122]], i32 [[TMP130]], i32 [[TMP131]] -; CHECK-NEXT: [[TMP133:%.*]] = zext i32 [[TMP132]] to i64 -; CHECK-NEXT: [[TMP134:%.*]] = zext i32 [[TMP109]] to i64 -; CHECK-NEXT: [[TMP135:%.*]] = mul i64 [[TMP133]], [[TMP134]] -; CHECK-NEXT: [[TMP136:%.*]] = trunc i64 [[TMP135]] to i32 -; CHECK-NEXT: [[TMP137:%.*]] = lshr i64 [[TMP135]], 32 -; CHECK-NEXT: [[TMP138:%.*]] = trunc i64 [[TMP137]] to i32 -; CHECK-NEXT: [[TMP139:%.*]] = mul i32 [[TMP138]], [[TMP110]] -; CHECK-NEXT: [[TMP140:%.*]] = sub i32 [[TMP109]], [[TMP139]] -; CHECK-NEXT: [[TMP141:%.*]] = icmp uge i32 [[TMP140]], [[TMP110]] -; CHECK-NEXT: [[TMP142:%.*]] = select i1 [[TMP141]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP143:%.*]] = icmp uge i32 [[TMP109]], [[TMP139]] -; CHECK-NEXT: [[TMP144:%.*]] = select i1 [[TMP143]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP145:%.*]] = and i32 [[TMP142]], [[TMP144]] -; CHECK-NEXT: [[TMP146:%.*]] = icmp eq i32 [[TMP145]], 0 -; CHECK-NEXT: [[TMP147:%.*]] = sub i32 [[TMP140]], [[TMP110]] -; CHECK-NEXT: [[TMP148:%.*]] = add i32 [[TMP140]], [[TMP110]] -; CHECK-NEXT: [[TMP149:%.*]] = select i1 [[TMP146]], i32 [[TMP140]], i32 [[TMP147]] -; CHECK-NEXT: [[TMP150:%.*]] = select i1 [[TMP143]], i32 [[TMP149]], i32 [[TMP148]] -; CHECK-NEXT: [[TMP151:%.*]] = xor i32 [[TMP150]], [[TMP105]] -; CHECK-NEXT: [[TMP152:%.*]] = sub i32 [[TMP151]], [[TMP105]] -; CHECK-NEXT: [[TMP153:%.*]] = insertelement <4 x i32> [[TMP102]], i32 [[TMP152]], i64 2 -; CHECK-NEXT: [[TMP154:%.*]] = extractelement <4 x i32> [[X]], i64 3 -; CHECK-NEXT: [[TMP155:%.*]] = extractelement <4 x i32> [[Y]], i64 3 -; CHECK-NEXT: [[TMP156:%.*]] = ashr i32 [[TMP154]], 31 -; CHECK-NEXT: [[TMP157:%.*]] = ashr i32 [[TMP155]], 31 -; CHECK-NEXT: [[TMP158:%.*]] = add i32 [[TMP154]], [[TMP156]] -; CHECK-NEXT: [[TMP159:%.*]] = add i32 [[TMP155]], [[TMP157]] -; CHECK-NEXT: [[TMP160:%.*]] = xor i32 [[TMP158]], [[TMP156]] -; CHECK-NEXT: [[TMP161:%.*]] = xor i32 [[TMP159]], [[TMP157]] -; CHECK-NEXT: [[TMP162:%.*]] = uitofp i32 [[TMP161]] to float -; CHECK-NEXT: [[TMP163:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP162]]) -; CHECK-NEXT: [[TMP164:%.*]] = fmul fast float [[TMP163]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP165:%.*]] = fptoui float [[TMP164]] to i32 +; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP7]], [[TMP37]] +; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = sub i32 [[TMP38]], [[TMP8]] +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP38]], [[TMP8]] +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP41]], i32 [[TMP42]], i32 [[TMP38]] +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP40]], i32 [[TMP44]], i32 [[TMP43]] +; CHECK-NEXT: [[TMP46:%.*]] = xor i32 [[TMP45]], [[TMP3]] +; CHECK-NEXT: [[TMP47:%.*]] = sub i32 [[TMP46]], [[TMP3]] +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <4 x i32> undef, i32 [[TMP47]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <4 x i32> [[Y]], i64 1 +; CHECK-NEXT: [[TMP51:%.*]] = ashr i32 [[TMP49]], 31 +; CHECK-NEXT: [[TMP52:%.*]] = ashr i32 [[TMP50]], 31 +; CHECK-NEXT: [[TMP53:%.*]] = add i32 [[TMP49]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = add i32 [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP55:%.*]] = xor i32 [[TMP53]], [[TMP51]] +; CHECK-NEXT: [[TMP56:%.*]] = xor i32 [[TMP54]], [[TMP52]] +; CHECK-NEXT: [[TMP57:%.*]] = uitofp i32 [[TMP56]] to float +; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]]) +; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP58]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP60:%.*]] = fptoui float [[TMP59]] to i32 +; CHECK-NEXT: [[TMP61:%.*]] = zext i32 [[TMP60]] to i64 +; CHECK-NEXT: [[TMP62:%.*]] = zext i32 [[TMP56]] to i64 +; CHECK-NEXT: [[TMP63:%.*]] = mul i64 [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 +; CHECK-NEXT: [[TMP65:%.*]] = lshr i64 [[TMP63]], 32 +; CHECK-NEXT: [[TMP66:%.*]] = trunc i64 [[TMP65]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = sub i32 0, [[TMP64]] +; CHECK-NEXT: [[TMP68:%.*]] = icmp eq i32 [[TMP66]], 0 +; CHECK-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP67]], i32 [[TMP64]] +; CHECK-NEXT: [[TMP70:%.*]] = zext i32 [[TMP69]] to i64 +; CHECK-NEXT: [[TMP71:%.*]] = zext i32 [[TMP60]] to i64 +; CHECK-NEXT: [[TMP72:%.*]] = mul i64 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 +; CHECK-NEXT: [[TMP74:%.*]] = lshr i64 [[TMP72]], 32 +; CHECK-NEXT: [[TMP75:%.*]] = trunc i64 [[TMP74]] to i32 +; CHECK-NEXT: [[TMP76:%.*]] = add i32 [[TMP60]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = sub i32 [[TMP60]], [[TMP75]] +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP68]], i32 [[TMP76]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = zext i32 [[TMP78]] to i64 +; CHECK-NEXT: [[TMP80:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP81:%.*]] = mul i64 [[TMP79]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = trunc i64 [[TMP81]] to i32 +; CHECK-NEXT: [[TMP83:%.*]] = lshr i64 [[TMP81]], 32 +; CHECK-NEXT: [[TMP84:%.*]] = trunc i64 [[TMP83]] to i32 +; CHECK-NEXT: [[TMP85:%.*]] = mul i32 [[TMP84]], [[TMP56]] +; CHECK-NEXT: [[TMP86:%.*]] = sub i32 [[TMP55]], [[TMP85]] +; CHECK-NEXT: [[TMP87:%.*]] = icmp uge i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP88:%.*]] = icmp uge i32 [[TMP55]], [[TMP85]] +; CHECK-NEXT: [[TMP89:%.*]] = and i1 [[TMP87]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = sub i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP91:%.*]] = add i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP89]], i32 [[TMP90]], i32 [[TMP86]] +; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP88]], i32 [[TMP92]], i32 [[TMP91]] +; CHECK-NEXT: [[TMP94:%.*]] = xor i32 [[TMP93]], [[TMP51]] +; CHECK-NEXT: [[TMP95:%.*]] = sub i32 [[TMP94]], [[TMP51]] +; CHECK-NEXT: [[TMP96:%.*]] = insertelement <4 x i32> [[TMP48]], i32 [[TMP95]], i64 1 +; CHECK-NEXT: [[TMP97:%.*]] = extractelement <4 x i32> [[X]], i64 2 +; CHECK-NEXT: [[TMP98:%.*]] = extractelement <4 x i32> [[Y]], i64 2 +; CHECK-NEXT: [[TMP99:%.*]] = ashr i32 [[TMP97]], 31 +; CHECK-NEXT: [[TMP100:%.*]] = ashr i32 [[TMP98]], 31 +; CHECK-NEXT: [[TMP101:%.*]] = add i32 [[TMP97]], [[TMP99]] +; CHECK-NEXT: [[TMP102:%.*]] = add i32 [[TMP98]], [[TMP100]] +; CHECK-NEXT: [[TMP103:%.*]] = xor i32 [[TMP101]], [[TMP99]] +; CHECK-NEXT: [[TMP104:%.*]] = xor i32 [[TMP102]], [[TMP100]] +; CHECK-NEXT: [[TMP105:%.*]] = uitofp i32 [[TMP104]] to float +; CHECK-NEXT: [[TMP106:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP105]]) +; CHECK-NEXT: [[TMP107:%.*]] = fmul fast float [[TMP106]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP108:%.*]] = fptoui float [[TMP107]] to i32 +; CHECK-NEXT: [[TMP109:%.*]] = zext i32 [[TMP108]] to i64 +; CHECK-NEXT: [[TMP110:%.*]] = zext i32 [[TMP104]] to i64 +; CHECK-NEXT: [[TMP111:%.*]] = mul i64 [[TMP109]], [[TMP110]] +; CHECK-NEXT: [[TMP112:%.*]] = trunc i64 [[TMP111]] to i32 +; CHECK-NEXT: [[TMP113:%.*]] = lshr i64 [[TMP111]], 32 +; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i32 +; CHECK-NEXT: [[TMP115:%.*]] = sub i32 0, [[TMP112]] +; CHECK-NEXT: [[TMP116:%.*]] = icmp eq i32 [[TMP114]], 0 +; CHECK-NEXT: [[TMP117:%.*]] = select i1 [[TMP116]], i32 [[TMP115]], i32 [[TMP112]] +; CHECK-NEXT: [[TMP118:%.*]] = zext i32 [[TMP117]] to i64 +; CHECK-NEXT: [[TMP119:%.*]] = zext i32 [[TMP108]] to i64 +; CHECK-NEXT: [[TMP120:%.*]] = mul i64 [[TMP118]], [[TMP119]] +; CHECK-NEXT: [[TMP121:%.*]] = trunc i64 [[TMP120]] to i32 +; CHECK-NEXT: [[TMP122:%.*]] = lshr i64 [[TMP120]], 32 +; CHECK-NEXT: [[TMP123:%.*]] = trunc i64 [[TMP122]] to i32 +; CHECK-NEXT: [[TMP124:%.*]] = add i32 [[TMP108]], [[TMP123]] +; CHECK-NEXT: [[TMP125:%.*]] = sub i32 [[TMP108]], [[TMP123]] +; CHECK-NEXT: [[TMP126:%.*]] = select i1 [[TMP116]], i32 [[TMP124]], i32 [[TMP125]] +; CHECK-NEXT: [[TMP127:%.*]] = zext i32 [[TMP126]] to i64 +; CHECK-NEXT: [[TMP128:%.*]] = zext i32 [[TMP103]] to i64 +; CHECK-NEXT: [[TMP129:%.*]] = mul i64 [[TMP127]], [[TMP128]] +; CHECK-NEXT: [[TMP130:%.*]] = trunc i64 [[TMP129]] to i32 +; CHECK-NEXT: [[TMP131:%.*]] = lshr i64 [[TMP129]], 32 +; CHECK-NEXT: [[TMP132:%.*]] = trunc i64 [[TMP131]] to i32 +; CHECK-NEXT: [[TMP133:%.*]] = mul i32 [[TMP132]], [[TMP104]] +; CHECK-NEXT: [[TMP134:%.*]] = sub i32 [[TMP103]], [[TMP133]] +; CHECK-NEXT: [[TMP135:%.*]] = icmp uge i32 [[TMP134]], [[TMP104]] +; CHECK-NEXT: [[TMP136:%.*]] = icmp uge i32 [[TMP103]], [[TMP133]] +; CHECK-NEXT: [[TMP137:%.*]] = and i1 [[TMP135]], [[TMP136]] +; CHECK-NEXT: [[TMP138:%.*]] = sub i32 [[TMP134]], [[TMP104]] +; CHECK-NEXT: [[TMP139:%.*]] = add i32 [[TMP134]], [[TMP104]] +; CHECK-NEXT: [[TMP140:%.*]] = select i1 [[TMP137]], i32 [[TMP138]], i32 [[TMP134]] +; CHECK-NEXT: [[TMP141:%.*]] = select i1 [[TMP136]], i32 [[TMP140]], i32 [[TMP139]] +; CHECK-NEXT: [[TMP142:%.*]] = xor i32 [[TMP141]], [[TMP99]] +; CHECK-NEXT: [[TMP143:%.*]] = sub i32 [[TMP142]], [[TMP99]] +; CHECK-NEXT: [[TMP144:%.*]] = insertelement <4 x i32> [[TMP96]], i32 [[TMP143]], i64 2 +; CHECK-NEXT: [[TMP145:%.*]] = extractelement <4 x i32> [[X]], i64 3 +; CHECK-NEXT: [[TMP146:%.*]] = extractelement <4 x i32> [[Y]], i64 3 +; CHECK-NEXT: [[TMP147:%.*]] = ashr i32 [[TMP145]], 31 +; CHECK-NEXT: [[TMP148:%.*]] = ashr i32 [[TMP146]], 31 +; CHECK-NEXT: [[TMP149:%.*]] = add i32 [[TMP145]], [[TMP147]] +; CHECK-NEXT: [[TMP150:%.*]] = add i32 [[TMP146]], [[TMP148]] +; CHECK-NEXT: [[TMP151:%.*]] = xor i32 [[TMP149]], [[TMP147]] +; CHECK-NEXT: [[TMP152:%.*]] = xor i32 [[TMP150]], [[TMP148]] +; CHECK-NEXT: [[TMP153:%.*]] = uitofp i32 [[TMP152]] to float +; CHECK-NEXT: [[TMP154:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP153]]) +; CHECK-NEXT: [[TMP155:%.*]] = fmul fast float [[TMP154]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP156:%.*]] = fptoui float [[TMP155]] to i32 +; CHECK-NEXT: [[TMP157:%.*]] = zext i32 [[TMP156]] to i64 +; CHECK-NEXT: [[TMP158:%.*]] = zext i32 [[TMP152]] to i64 +; CHECK-NEXT: [[TMP159:%.*]] = mul i64 [[TMP157]], [[TMP158]] +; CHECK-NEXT: [[TMP160:%.*]] = trunc i64 [[TMP159]] to i32 +; CHECK-NEXT: [[TMP161:%.*]] = lshr i64 [[TMP159]], 32 +; CHECK-NEXT: [[TMP162:%.*]] = trunc i64 [[TMP161]] to i32 +; CHECK-NEXT: [[TMP163:%.*]] = sub i32 0, [[TMP160]] +; CHECK-NEXT: [[TMP164:%.*]] = icmp eq i32 [[TMP162]], 0 +; CHECK-NEXT: [[TMP165:%.*]] = select i1 [[TMP164]], i32 [[TMP163]], i32 [[TMP160]] ; CHECK-NEXT: [[TMP166:%.*]] = zext i32 [[TMP165]] to i64 -; CHECK-NEXT: [[TMP167:%.*]] = zext i32 [[TMP161]] to i64 +; CHECK-NEXT: [[TMP167:%.*]] = zext i32 [[TMP156]] to i64 ; CHECK-NEXT: [[TMP168:%.*]] = mul i64 [[TMP166]], [[TMP167]] ; CHECK-NEXT: [[TMP169:%.*]] = trunc i64 [[TMP168]] to i32 ; CHECK-NEXT: [[TMP170:%.*]] = lshr i64 [[TMP168]], 32 ; CHECK-NEXT: [[TMP171:%.*]] = trunc i64 [[TMP170]] to i32 -; CHECK-NEXT: [[TMP172:%.*]] = sub i32 0, [[TMP169]] -; CHECK-NEXT: [[TMP173:%.*]] = icmp eq i32 [[TMP171]], 0 -; CHECK-NEXT: [[TMP174:%.*]] = select i1 [[TMP173]], i32 [[TMP172]], i32 [[TMP169]] +; CHECK-NEXT: [[TMP172:%.*]] = add i32 [[TMP156]], [[TMP171]] +; CHECK-NEXT: [[TMP173:%.*]] = sub i32 [[TMP156]], [[TMP171]] +; CHECK-NEXT: [[TMP174:%.*]] = select i1 [[TMP164]], i32 [[TMP172]], i32 [[TMP173]] ; CHECK-NEXT: [[TMP175:%.*]] = zext i32 [[TMP174]] to i64 -; CHECK-NEXT: [[TMP176:%.*]] = zext i32 [[TMP165]] to i64 +; CHECK-NEXT: [[TMP176:%.*]] = zext i32 [[TMP151]] to i64 ; CHECK-NEXT: [[TMP177:%.*]] = mul i64 [[TMP175]], [[TMP176]] ; CHECK-NEXT: [[TMP178:%.*]] = trunc i64 [[TMP177]] to i32 ; CHECK-NEXT: [[TMP179:%.*]] = lshr i64 [[TMP177]], 32 ; CHECK-NEXT: [[TMP180:%.*]] = trunc i64 [[TMP179]] to i32 -; CHECK-NEXT: [[TMP181:%.*]] = add i32 [[TMP165]], [[TMP180]] -; CHECK-NEXT: [[TMP182:%.*]] = sub i32 [[TMP165]], [[TMP180]] -; CHECK-NEXT: [[TMP183:%.*]] = select i1 [[TMP173]], i32 [[TMP181]], i32 [[TMP182]] -; CHECK-NEXT: [[TMP184:%.*]] = zext i32 [[TMP183]] to i64 -; CHECK-NEXT: [[TMP185:%.*]] = zext i32 [[TMP160]] to i64 -; CHECK-NEXT: [[TMP186:%.*]] = mul i64 [[TMP184]], [[TMP185]] -; CHECK-NEXT: [[TMP187:%.*]] = trunc i64 [[TMP186]] to i32 -; CHECK-NEXT: [[TMP188:%.*]] = lshr i64 [[TMP186]], 32 -; CHECK-NEXT: [[TMP189:%.*]] = trunc i64 [[TMP188]] to i32 -; CHECK-NEXT: [[TMP190:%.*]] = mul i32 [[TMP189]], [[TMP161]] -; CHECK-NEXT: [[TMP191:%.*]] = sub i32 [[TMP160]], [[TMP190]] -; CHECK-NEXT: [[TMP192:%.*]] = icmp uge i32 [[TMP191]], [[TMP161]] -; CHECK-NEXT: [[TMP193:%.*]] = select i1 [[TMP192]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP194:%.*]] = icmp uge i32 [[TMP160]], [[TMP190]] -; CHECK-NEXT: [[TMP195:%.*]] = select i1 [[TMP194]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP196:%.*]] = and i32 [[TMP193]], [[TMP195]] -; CHECK-NEXT: [[TMP197:%.*]] = icmp eq i32 [[TMP196]], 0 -; CHECK-NEXT: [[TMP198:%.*]] = sub i32 [[TMP191]], [[TMP161]] -; CHECK-NEXT: [[TMP199:%.*]] = add i32 [[TMP191]], [[TMP161]] -; CHECK-NEXT: [[TMP200:%.*]] = select i1 [[TMP197]], i32 [[TMP191]], i32 [[TMP198]] -; CHECK-NEXT: [[TMP201:%.*]] = select i1 [[TMP194]], i32 [[TMP200]], i32 [[TMP199]] -; CHECK-NEXT: [[TMP202:%.*]] = xor i32 [[TMP201]], [[TMP156]] -; CHECK-NEXT: [[TMP203:%.*]] = sub i32 [[TMP202]], [[TMP156]] -; CHECK-NEXT: [[TMP204:%.*]] = insertelement <4 x i32> [[TMP153]], i32 [[TMP203]], i64 3 -; CHECK-NEXT: store <4 x i32> [[TMP204]], <4 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP181:%.*]] = mul i32 [[TMP180]], [[TMP152]] +; CHECK-NEXT: [[TMP182:%.*]] = sub i32 [[TMP151]], [[TMP181]] +; CHECK-NEXT: [[TMP183:%.*]] = icmp uge i32 [[TMP182]], [[TMP152]] +; CHECK-NEXT: [[TMP184:%.*]] = icmp uge i32 [[TMP151]], [[TMP181]] +; CHECK-NEXT: [[TMP185:%.*]] = and i1 [[TMP183]], [[TMP184]] +; CHECK-NEXT: [[TMP186:%.*]] = sub i32 [[TMP182]], [[TMP152]] +; CHECK-NEXT: [[TMP187:%.*]] = add i32 [[TMP182]], [[TMP152]] +; CHECK-NEXT: [[TMP188:%.*]] = select i1 [[TMP185]], i32 [[TMP186]], i32 [[TMP182]] +; CHECK-NEXT: [[TMP189:%.*]] = select i1 [[TMP184]], i32 [[TMP188]], i32 [[TMP187]] +; CHECK-NEXT: [[TMP190:%.*]] = xor i32 [[TMP189]], [[TMP147]] +; CHECK-NEXT: [[TMP191:%.*]] = sub i32 [[TMP190]], [[TMP147]] +; CHECK-NEXT: [[TMP192:%.*]] = insertelement <4 x i32> [[TMP144]], i32 [[TMP191]], i64 3 +; CHECK-NEXT: store <4 x i32> [[TMP192]], <4 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: srem_v4i32: @@ -4304,60 +4244,54 @@ ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] ; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP30]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i32> undef, i32 [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 -; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float -; CHECK-NEXT: [[TMP47:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP46]]) -; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 -; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 -; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] -; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 -; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 -; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] -; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] -; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 -; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 -; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 -; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] -; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 -; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 -; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 -; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 -; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] -; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 -; CHECK-NEXT: [[TMP82:%.*]] = add i32 [[TMP73]], 1 -; CHECK-NEXT: [[TMP83:%.*]] = sub i32 [[TMP73]], 1 -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP73]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] -; CHECK-NEXT: [[TMP86:%.*]] = insertelement <2 x i32> [[TMP43]], i32 [[TMP85]], i64 1 -; CHECK-NEXT: store <2 x i32> [[TMP86]], <2 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP30]], 1 +; CHECK-NEXT: [[TMP37:%.*]] = sub i32 [[TMP30]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP36]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i32> undef, i32 [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = uitofp i32 [[TMP42]] to float +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = fmul fast float [[TMP44]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP46:%.*]] = fptoui float [[TMP45]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = zext i32 [[TMP42]] to i64 +; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = trunc i64 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP51:%.*]] = lshr i64 [[TMP49]], 32 +; CHECK-NEXT: [[TMP52:%.*]] = trunc i64 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP53:%.*]] = sub i32 0, [[TMP50]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp eq i32 [[TMP52]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[TMP53]], i32 [[TMP50]] +; CHECK-NEXT: [[TMP56:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[TMP58]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = lshr i64 [[TMP58]], 32 +; CHECK-NEXT: [[TMP61:%.*]] = trunc i64 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP54]], i32 [[TMP62]], i32 [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP64]] to i64 +; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP41]] to i64 +; CHECK-NEXT: [[TMP67:%.*]] = mul i64 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP67]], 32 +; CHECK-NEXT: [[TMP70:%.*]] = trunc i64 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP71:%.*]] = mul i32 [[TMP70]], [[TMP42]] +; CHECK-NEXT: [[TMP72:%.*]] = sub i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp uge i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp uge i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP75:%.*]] = and i1 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = add i32 [[TMP70]], 1 +; CHECK-NEXT: [[TMP77:%.*]] = sub i32 [[TMP70]], 1 +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP75]], i32 [[TMP76]], i32 [[TMP70]] +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP74]], i32 [[TMP78]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <2 x i32> [[TMP40]], i32 [[TMP79]], i64 1 +; CHECK-NEXT: store <2 x i32> [[TMP80]], <2 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: udiv_v2i32_pow2_shl_denom: @@ -4569,60 +4503,54 @@ ; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], [[TMP2]] ; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP1]], [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = icmp uge i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP35:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP34]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP32]], [[TMP2]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP38]], i32 [[TMP32]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], i32 [[TMP41]], i32 [[TMP40]] -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i32> undef, i32 [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 -; CHECK-NEXT: [[TMP46:%.*]] = uitofp i32 [[TMP45]] to float -; CHECK-NEXT: [[TMP47:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP46]]) -; CHECK-NEXT: [[TMP48:%.*]] = fmul fast float [[TMP47]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP49:%.*]] = fptoui float [[TMP48]] to i32 -; CHECK-NEXT: [[TMP50:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP51:%.*]] = zext i32 [[TMP45]] to i64 -; CHECK-NEXT: [[TMP52:%.*]] = mul i64 [[TMP50]], [[TMP51]] -; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i32 -; CHECK-NEXT: [[TMP54:%.*]] = lshr i64 [[TMP52]], 32 -; CHECK-NEXT: [[TMP55:%.*]] = trunc i64 [[TMP54]] to i32 -; CHECK-NEXT: [[TMP56:%.*]] = sub i32 0, [[TMP53]] -; CHECK-NEXT: [[TMP57:%.*]] = icmp eq i32 [[TMP55]], 0 -; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP53]] -; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP60:%.*]] = zext i32 [[TMP49]] to i64 -; CHECK-NEXT: [[TMP61:%.*]] = mul i64 [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP62:%.*]] = trunc i64 [[TMP61]] to i32 -; CHECK-NEXT: [[TMP63:%.*]] = lshr i64 [[TMP61]], 32 -; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 -; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = sub i32 [[TMP49]], [[TMP64]] -; CHECK-NEXT: [[TMP67:%.*]] = select i1 [[TMP57]], i32 [[TMP65]], i32 [[TMP66]] -; CHECK-NEXT: [[TMP68:%.*]] = zext i32 [[TMP67]] to i64 -; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP44]] to i64 -; CHECK-NEXT: [[TMP70:%.*]] = mul i64 [[TMP68]], [[TMP69]] -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = lshr i64 [[TMP70]], 32 -; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 -; CHECK-NEXT: [[TMP74:%.*]] = mul i32 [[TMP73]], [[TMP45]] -; CHECK-NEXT: [[TMP75:%.*]] = sub i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = icmp uge i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP78:%.*]] = icmp uge i32 [[TMP44]], [[TMP74]] -; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP78]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP80:%.*]] = and i32 [[TMP77]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i32 [[TMP80]], 0 -; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP83:%.*]] = add i32 [[TMP75]], [[TMP45]] -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP81]], i32 [[TMP75]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP85:%.*]] = select i1 [[TMP78]], i32 [[TMP84]], i32 [[TMP83]] -; CHECK-NEXT: [[TMP86:%.*]] = insertelement <2 x i32> [[TMP43]], i32 [[TMP85]], i64 1 -; CHECK-NEXT: store <2 x i32> [[TMP86]], <2 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp uge i32 [[TMP1]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = sub i32 [[TMP32]], [[TMP2]] +; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP32]], [[TMP2]] +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP36]], i32 [[TMP32]] +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i32> undef, i32 [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = uitofp i32 [[TMP42]] to float +; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = fmul fast float [[TMP44]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP46:%.*]] = fptoui float [[TMP45]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = zext i32 [[TMP42]] to i64 +; CHECK-NEXT: [[TMP49:%.*]] = mul i64 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = trunc i64 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP51:%.*]] = lshr i64 [[TMP49]], 32 +; CHECK-NEXT: [[TMP52:%.*]] = trunc i64 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP53:%.*]] = sub i32 0, [[TMP50]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp eq i32 [[TMP52]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[TMP53]], i32 [[TMP50]] +; CHECK-NEXT: [[TMP56:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = mul i64 [[TMP56]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[TMP58]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = lshr i64 [[TMP58]], 32 +; CHECK-NEXT: [[TMP61:%.*]] = trunc i64 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = add i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub i32 [[TMP46]], [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = select i1 [[TMP54]], i32 [[TMP62]], i32 [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP64]] to i64 +; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP41]] to i64 +; CHECK-NEXT: [[TMP67:%.*]] = mul i64 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP67]], 32 +; CHECK-NEXT: [[TMP70:%.*]] = trunc i64 [[TMP69]] to i32 +; CHECK-NEXT: [[TMP71:%.*]] = mul i32 [[TMP70]], [[TMP42]] +; CHECK-NEXT: [[TMP72:%.*]] = sub i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp uge i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp uge i32 [[TMP41]], [[TMP71]] +; CHECK-NEXT: [[TMP75:%.*]] = and i1 [[TMP73]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = sub i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP77:%.*]] = add i32 [[TMP72]], [[TMP42]] +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP75]], i32 [[TMP76]], i32 [[TMP72]] +; CHECK-NEXT: [[TMP79:%.*]] = select i1 [[TMP74]], i32 [[TMP78]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <2 x i32> [[TMP40]], i32 [[TMP79]], i64 1 +; CHECK-NEXT: store <2 x i32> [[TMP80]], <2 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: urem_v2i32_pow2_shl_denom: @@ -4912,71 +4840,65 @@ ; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP9]] ; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP8]], [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP39]], [[TMP9]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP42:%.*]] = icmp uge i32 [[TMP8]], [[TMP38]] -; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP41]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[TMP44]], 0 -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP37]], 1 -; CHECK-NEXT: [[TMP47:%.*]] = sub i32 [[TMP37]], 1 -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP45]], i32 [[TMP37]], i32 [[TMP46]] -; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP42]], i32 [[TMP48]], i32 [[TMP47]] -; CHECK-NEXT: [[TMP50:%.*]] = xor i32 [[TMP49]], [[TMP5]] -; CHECK-NEXT: [[TMP51:%.*]] = sub i32 [[TMP50]], [[TMP5]] -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x i32> undef, i32 [[TMP51]], i64 0 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <2 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 -; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 -; CHECK-NEXT: [[TMP56:%.*]] = ashr i32 [[TMP54]], 31 -; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] -; CHECK-NEXT: [[TMP58:%.*]] = add i32 [[TMP53]], [[TMP55]] -; CHECK-NEXT: [[TMP59:%.*]] = add i32 [[TMP54]], [[TMP56]] -; CHECK-NEXT: [[TMP60:%.*]] = xor i32 [[TMP58]], [[TMP55]] -; CHECK-NEXT: [[TMP61:%.*]] = xor i32 [[TMP59]], [[TMP56]] -; CHECK-NEXT: [[TMP62:%.*]] = uitofp i32 [[TMP61]] to float -; CHECK-NEXT: [[TMP63:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP62]]) -; CHECK-NEXT: [[TMP64:%.*]] = fmul fast float [[TMP63]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP65:%.*]] = fptoui float [[TMP64]] to i32 -; CHECK-NEXT: [[TMP66:%.*]] = zext i32 [[TMP65]] to i64 -; CHECK-NEXT: [[TMP67:%.*]] = zext i32 [[TMP61]] to i64 -; CHECK-NEXT: [[TMP68:%.*]] = mul i64 [[TMP66]], [[TMP67]] -; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 -; CHECK-NEXT: [[TMP70:%.*]] = lshr i64 [[TMP68]], 32 -; CHECK-NEXT: [[TMP71:%.*]] = trunc i64 [[TMP70]] to i32 -; CHECK-NEXT: [[TMP72:%.*]] = sub i32 0, [[TMP69]] -; CHECK-NEXT: [[TMP73:%.*]] = icmp eq i32 [[TMP71]], 0 -; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], i32 [[TMP72]], i32 [[TMP69]] -; CHECK-NEXT: [[TMP75:%.*]] = zext i32 [[TMP74]] to i64 -; CHECK-NEXT: [[TMP76:%.*]] = zext i32 [[TMP65]] to i64 -; CHECK-NEXT: [[TMP77:%.*]] = mul i64 [[TMP75]], [[TMP76]] -; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 -; CHECK-NEXT: [[TMP79:%.*]] = lshr i64 [[TMP77]], 32 -; CHECK-NEXT: [[TMP80:%.*]] = trunc i64 [[TMP79]] to i32 -; CHECK-NEXT: [[TMP81:%.*]] = add i32 [[TMP65]], [[TMP80]] -; CHECK-NEXT: [[TMP82:%.*]] = sub i32 [[TMP65]], [[TMP80]] -; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP73]], i32 [[TMP81]], i32 [[TMP82]] -; CHECK-NEXT: [[TMP84:%.*]] = zext i32 [[TMP83]] to i64 -; CHECK-NEXT: [[TMP85:%.*]] = zext i32 [[TMP60]] to i64 -; CHECK-NEXT: [[TMP86:%.*]] = mul i64 [[TMP84]], [[TMP85]] -; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 -; CHECK-NEXT: [[TMP88:%.*]] = lshr i64 [[TMP86]], 32 -; CHECK-NEXT: [[TMP89:%.*]] = trunc i64 [[TMP88]] to i32 -; CHECK-NEXT: [[TMP90:%.*]] = mul i32 [[TMP89]], [[TMP61]] -; CHECK-NEXT: [[TMP91:%.*]] = sub i32 [[TMP60]], [[TMP90]] -; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP91]], [[TMP61]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP94:%.*]] = icmp uge i32 [[TMP60]], [[TMP90]] -; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP96:%.*]] = and i32 [[TMP93]], [[TMP95]] -; CHECK-NEXT: [[TMP97:%.*]] = icmp eq i32 [[TMP96]], 0 -; CHECK-NEXT: [[TMP98:%.*]] = add i32 [[TMP89]], 1 -; CHECK-NEXT: [[TMP99:%.*]] = sub i32 [[TMP89]], 1 -; CHECK-NEXT: [[TMP100:%.*]] = select i1 [[TMP97]], i32 [[TMP89]], i32 [[TMP98]] -; CHECK-NEXT: [[TMP101:%.*]] = select i1 [[TMP94]], i32 [[TMP100]], i32 [[TMP99]] -; CHECK-NEXT: [[TMP102:%.*]] = xor i32 [[TMP101]], [[TMP57]] -; CHECK-NEXT: [[TMP103:%.*]] = sub i32 [[TMP102]], [[TMP57]] -; CHECK-NEXT: [[TMP104:%.*]] = insertelement <2 x i32> [[TMP52]], i32 [[TMP103]], i64 1 -; CHECK-NEXT: store <2 x i32> [[TMP104]], <2 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp uge i32 [[TMP8]], [[TMP38]] +; CHECK-NEXT: [[TMP42:%.*]] = and i1 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP37]], 1 +; CHECK-NEXT: [[TMP44:%.*]] = sub i32 [[TMP37]], 1 +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP42]], i32 [[TMP43]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP41]], i32 [[TMP45]], i32 [[TMP44]] +; CHECK-NEXT: [[TMP47:%.*]] = xor i32 [[TMP46]], [[TMP5]] +; CHECK-NEXT: [[TMP48:%.*]] = sub i32 [[TMP47]], [[TMP5]] +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <2 x i32> undef, i32 [[TMP48]], i64 0 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <2 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 +; CHECK-NEXT: [[TMP52:%.*]] = ashr i32 [[TMP50]], 31 +; CHECK-NEXT: [[TMP53:%.*]] = ashr i32 [[TMP51]], 31 +; CHECK-NEXT: [[TMP54:%.*]] = xor i32 [[TMP52]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = add i32 [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP51]], [[TMP53]] +; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP52]] +; CHECK-NEXT: [[TMP58:%.*]] = xor i32 [[TMP56]], [[TMP53]] +; CHECK-NEXT: [[TMP59:%.*]] = uitofp i32 [[TMP58]] to float +; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP59]]) +; CHECK-NEXT: [[TMP61:%.*]] = fmul fast float [[TMP60]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP62:%.*]] = fptoui float [[TMP61]] to i32 +; CHECK-NEXT: [[TMP63:%.*]] = zext i32 [[TMP62]] to i64 +; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[TMP58]] to i64 +; CHECK-NEXT: [[TMP65:%.*]] = mul i64 [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = trunc i64 [[TMP65]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = lshr i64 [[TMP65]], 32 +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i32 +; CHECK-NEXT: [[TMP69:%.*]] = sub i32 0, [[TMP66]] +; CHECK-NEXT: [[TMP70:%.*]] = icmp eq i32 [[TMP68]], 0 +; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], i32 [[TMP69]], i32 [[TMP66]] +; CHECK-NEXT: [[TMP72:%.*]] = zext i32 [[TMP71]] to i64 +; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP62]] to i64 +; CHECK-NEXT: [[TMP74:%.*]] = mul i64 [[TMP72]], [[TMP73]] +; CHECK-NEXT: [[TMP75:%.*]] = trunc i64 [[TMP74]] to i32 +; CHECK-NEXT: [[TMP76:%.*]] = lshr i64 [[TMP74]], 32 +; CHECK-NEXT: [[TMP77:%.*]] = trunc i64 [[TMP76]] to i32 +; CHECK-NEXT: [[TMP78:%.*]] = add i32 [[TMP62]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = sub i32 [[TMP62]], [[TMP77]] +; CHECK-NEXT: [[TMP80:%.*]] = select i1 [[TMP70]], i32 [[TMP78]], i32 [[TMP79]] +; CHECK-NEXT: [[TMP81:%.*]] = zext i32 [[TMP80]] to i64 +; CHECK-NEXT: [[TMP82:%.*]] = zext i32 [[TMP57]] to i64 +; CHECK-NEXT: [[TMP83:%.*]] = mul i64 [[TMP81]], [[TMP82]] +; CHECK-NEXT: [[TMP84:%.*]] = trunc i64 [[TMP83]] to i32 +; CHECK-NEXT: [[TMP85:%.*]] = lshr i64 [[TMP83]], 32 +; CHECK-NEXT: [[TMP86:%.*]] = trunc i64 [[TMP85]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = mul i32 [[TMP86]], [[TMP58]] +; CHECK-NEXT: [[TMP88:%.*]] = sub i32 [[TMP57]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = icmp uge i32 [[TMP88]], [[TMP58]] +; CHECK-NEXT: [[TMP90:%.*]] = icmp uge i32 [[TMP57]], [[TMP87]] +; CHECK-NEXT: [[TMP91:%.*]] = and i1 [[TMP89]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP86]], 1 +; CHECK-NEXT: [[TMP93:%.*]] = sub i32 [[TMP86]], 1 +; CHECK-NEXT: [[TMP94:%.*]] = select i1 [[TMP91]], i32 [[TMP92]], i32 [[TMP86]] +; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP90]], i32 [[TMP94]], i32 [[TMP93]] +; CHECK-NEXT: [[TMP96:%.*]] = xor i32 [[TMP95]], [[TMP54]] +; CHECK-NEXT: [[TMP97:%.*]] = sub i32 [[TMP96]], [[TMP54]] +; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i32> [[TMP49]], i32 [[TMP97]], i64 1 +; CHECK-NEXT: store <2 x i32> [[TMP98]], <2 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: sdiv_v2i32_pow2_shl_denom: @@ -5252,70 +5174,64 @@ ; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], [[TMP8]] ; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP7]], [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = icmp uge i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP41:%.*]] = icmp uge i32 [[TMP7]], [[TMP37]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], [[TMP42]] -; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP43]], 0 -; CHECK-NEXT: [[TMP45:%.*]] = sub i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP38]], [[TMP8]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP44]], i32 [[TMP38]], i32 [[TMP45]] -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], i32 [[TMP47]], i32 [[TMP46]] -; CHECK-NEXT: [[TMP49:%.*]] = xor i32 [[TMP48]], [[TMP3]] -; CHECK-NEXT: [[TMP50:%.*]] = sub i32 [[TMP49]], [[TMP3]] -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <2 x i32> undef, i32 [[TMP50]], i64 0 -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <2 x i32> [[X]], i64 1 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP52]], 31 -; CHECK-NEXT: [[TMP55:%.*]] = ashr i32 [[TMP53]], 31 -; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[TMP52]], [[TMP54]] -; CHECK-NEXT: [[TMP57:%.*]] = add i32 [[TMP53]], [[TMP55]] -; CHECK-NEXT: [[TMP58:%.*]] = xor i32 [[TMP56]], [[TMP54]] -; CHECK-NEXT: [[TMP59:%.*]] = xor i32 [[TMP57]], [[TMP55]] -; CHECK-NEXT: [[TMP60:%.*]] = uitofp i32 [[TMP59]] to float -; CHECK-NEXT: [[TMP61:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP60]]) -; CHECK-NEXT: [[TMP62:%.*]] = fmul fast float [[TMP61]], 0x41F0000000000000 -; CHECK-NEXT: [[TMP63:%.*]] = fptoui float [[TMP62]] to i32 -; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[TMP63]] to i64 -; CHECK-NEXT: [[TMP65:%.*]] = zext i32 [[TMP59]] to i64 -; CHECK-NEXT: [[TMP66:%.*]] = mul i64 [[TMP64]], [[TMP65]] -; CHECK-NEXT: [[TMP67:%.*]] = trunc i64 [[TMP66]] to i32 -; CHECK-NEXT: [[TMP68:%.*]] = lshr i64 [[TMP66]], 32 -; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i32 -; CHECK-NEXT: [[TMP70:%.*]] = sub i32 0, [[TMP67]] -; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[TMP69]], 0 -; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP70]], i32 [[TMP67]] -; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP72]] to i64 -; CHECK-NEXT: [[TMP74:%.*]] = zext i32 [[TMP63]] to i64 -; CHECK-NEXT: [[TMP75:%.*]] = mul i64 [[TMP73]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = trunc i64 [[TMP75]] to i32 -; CHECK-NEXT: [[TMP77:%.*]] = lshr i64 [[TMP75]], 32 -; CHECK-NEXT: [[TMP78:%.*]] = trunc i64 [[TMP77]] to i32 -; CHECK-NEXT: [[TMP79:%.*]] = add i32 [[TMP63]], [[TMP78]] -; CHECK-NEXT: [[TMP80:%.*]] = sub i32 [[TMP63]], [[TMP78]] -; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP71]], i32 [[TMP79]], i32 [[TMP80]] -; CHECK-NEXT: [[TMP82:%.*]] = zext i32 [[TMP81]] to i64 -; CHECK-NEXT: [[TMP83:%.*]] = zext i32 [[TMP58]] to i64 -; CHECK-NEXT: [[TMP84:%.*]] = mul i64 [[TMP82]], [[TMP83]] -; CHECK-NEXT: [[TMP85:%.*]] = trunc i64 [[TMP84]] to i32 -; CHECK-NEXT: [[TMP86:%.*]] = lshr i64 [[TMP84]], 32 -; CHECK-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i32 -; CHECK-NEXT: [[TMP88:%.*]] = mul i32 [[TMP87]], [[TMP59]] -; CHECK-NEXT: [[TMP89:%.*]] = sub i32 [[TMP58]], [[TMP88]] -; CHECK-NEXT: [[TMP90:%.*]] = icmp uge i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP92:%.*]] = icmp uge i32 [[TMP58]], [[TMP88]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 -1, i32 0 -; CHECK-NEXT: [[TMP94:%.*]] = and i32 [[TMP91]], [[TMP93]] -; CHECK-NEXT: [[TMP95:%.*]] = icmp eq i32 [[TMP94]], 0 -; CHECK-NEXT: [[TMP96:%.*]] = sub i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP97:%.*]] = add i32 [[TMP89]], [[TMP59]] -; CHECK-NEXT: [[TMP98:%.*]] = select i1 [[TMP95]], i32 [[TMP89]], i32 [[TMP96]] -; CHECK-NEXT: [[TMP99:%.*]] = select i1 [[TMP92]], i32 [[TMP98]], i32 [[TMP97]] -; CHECK-NEXT: [[TMP100:%.*]] = xor i32 [[TMP99]], [[TMP54]] -; CHECK-NEXT: [[TMP101:%.*]] = sub i32 [[TMP100]], [[TMP54]] -; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i32> [[TMP51]], i32 [[TMP101]], i64 1 -; CHECK-NEXT: store <2 x i32> [[TMP102]], <2 x i32> addrspace(1)* [[OUT:%.*]] +; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i32 [[TMP7]], [[TMP37]] +; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = sub i32 [[TMP38]], [[TMP8]] +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP38]], [[TMP8]] +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP41]], i32 [[TMP42]], i32 [[TMP38]] +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP40]], i32 [[TMP44]], i32 [[TMP43]] +; CHECK-NEXT: [[TMP46:%.*]] = xor i32 [[TMP45]], [[TMP3]] +; CHECK-NEXT: [[TMP47:%.*]] = sub i32 [[TMP46]], [[TMP3]] +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <2 x i32> undef, i32 [[TMP47]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[X]], i64 1 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <2 x i32> [[SHL_Y]], i64 1 +; CHECK-NEXT: [[TMP51:%.*]] = ashr i32 [[TMP49]], 31 +; CHECK-NEXT: [[TMP52:%.*]] = ashr i32 [[TMP50]], 31 +; CHECK-NEXT: [[TMP53:%.*]] = add i32 [[TMP49]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = add i32 [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP55:%.*]] = xor i32 [[TMP53]], [[TMP51]] +; CHECK-NEXT: [[TMP56:%.*]] = xor i32 [[TMP54]], [[TMP52]] +; CHECK-NEXT: [[TMP57:%.*]] = uitofp i32 [[TMP56]] to float +; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]]) +; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP58]], 0x41F0000000000000 +; CHECK-NEXT: [[TMP60:%.*]] = fptoui float [[TMP59]] to i32 +; CHECK-NEXT: [[TMP61:%.*]] = zext i32 [[TMP60]] to i64 +; CHECK-NEXT: [[TMP62:%.*]] = zext i32 [[TMP56]] to i64 +; CHECK-NEXT: [[TMP63:%.*]] = mul i64 [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = trunc i64 [[TMP63]] to i32 +; CHECK-NEXT: [[TMP65:%.*]] = lshr i64 [[TMP63]], 32 +; CHECK-NEXT: [[TMP66:%.*]] = trunc i64 [[TMP65]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = sub i32 0, [[TMP64]] +; CHECK-NEXT: [[TMP68:%.*]] = icmp eq i32 [[TMP66]], 0 +; CHECK-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP67]], i32 [[TMP64]] +; CHECK-NEXT: [[TMP70:%.*]] = zext i32 [[TMP69]] to i64 +; CHECK-NEXT: [[TMP71:%.*]] = zext i32 [[TMP60]] to i64 +; CHECK-NEXT: [[TMP72:%.*]] = mul i64 [[TMP70]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = trunc i64 [[TMP72]] to i32 +; CHECK-NEXT: [[TMP74:%.*]] = lshr i64 [[TMP72]], 32 +; CHECK-NEXT: [[TMP75:%.*]] = trunc i64 [[TMP74]] to i32 +; CHECK-NEXT: [[TMP76:%.*]] = add i32 [[TMP60]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = sub i32 [[TMP60]], [[TMP75]] +; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP68]], i32 [[TMP76]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = zext i32 [[TMP78]] to i64 +; CHECK-NEXT: [[TMP80:%.*]] = zext i32 [[TMP55]] to i64 +; CHECK-NEXT: [[TMP81:%.*]] = mul i64 [[TMP79]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = trunc i64 [[TMP81]] to i32 +; CHECK-NEXT: [[TMP83:%.*]] = lshr i64 [[TMP81]], 32 +; CHECK-NEXT: [[TMP84:%.*]] = trunc i64 [[TMP83]] to i32 +; CHECK-NEXT: [[TMP85:%.*]] = mul i32 [[TMP84]], [[TMP56]] +; CHECK-NEXT: [[TMP86:%.*]] = sub i32 [[TMP55]], [[TMP85]] +; CHECK-NEXT: [[TMP87:%.*]] = icmp uge i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP88:%.*]] = icmp uge i32 [[TMP55]], [[TMP85]] +; CHECK-NEXT: [[TMP89:%.*]] = and i1 [[TMP87]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = sub i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP91:%.*]] = add i32 [[TMP86]], [[TMP56]] +; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP89]], i32 [[TMP90]], i32 [[TMP86]] +; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP88]], i32 [[TMP92]], i32 [[TMP91]] +; CHECK-NEXT: [[TMP94:%.*]] = xor i32 [[TMP93]], [[TMP51]] +; CHECK-NEXT: [[TMP95:%.*]] = sub i32 [[TMP94]], [[TMP51]] +; CHECK-NEXT: [[TMP96:%.*]] = insertelement <2 x i32> [[TMP48]], i32 [[TMP95]], i64 1 +; CHECK-NEXT: store <2 x i32> [[TMP96]], <2 x i32> addrspace(1)* [[OUT:%.*]] ; CHECK-NEXT: ret void ; ; GCN-LABEL: srem_v2i32_pow2_shl_denom: