Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -260,6 +260,7 @@ MachineInstr *MI; auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { return Opcode == TargetOpcode::G_CONSTANT || + Opcode == TargetOpcode::G_IMPLICIT_DEF || (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT); }; auto GetImmediateValue = [HandleFConstant, @@ -285,6 +286,7 @@ case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: SeenOpcodes.push_back(std::make_pair( MI->getOpcode(), MRI.getType(MI->getOperand(0).getReg()).getSizeInBits())); @@ -305,6 +307,9 @@ if (!MI || !IsConstantOpcode(MI->getOpcode())) return None; + if (MI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) + return ValueAndVReg{0, VReg}; + Optional MaybeVal = GetImmediateValue(*MI); if (!MaybeVal) return None; @@ -319,6 +324,7 @@ Val = Val.sext(OpcodeAndSize.second); break; case TargetOpcode::G_ZEXT: + case TargetOpcode::G_ANYEXT: Val = Val.zext(OpcodeAndSize.second); break; } Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -642,6 +642,16 @@ auto ConstSrc1 = getConstantVRegValWithLookThrough(Src1, *MRI, true, true); if (ConstSrc1) { + // TODO: This should probably be a combine somewhere + // (build_vector_trunc $src0, undef -> copy $src0 + MachineInstr *Src1Def = MRI->getVRegDef(ConstSrc1->VReg); + if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) { + MI.setDesc(TII.get(AMDGPU::COPY)); + MI.RemoveOperand(2); + return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI) && + RBI.constrainGenericRegister(Src0, AMDGPU::SReg_32RegClass, *MRI); + } + auto ConstSrc0 = getConstantVRegValWithLookThrough(Src0, *MRI, true, true); if (ConstSrc0) { uint32_t Lo16 = static_cast(ConstSrc0->Value) & 0xffff; @@ -654,16 +664,6 @@ } } - // TODO: This should probably be a combine somewhere - // (build_vector_trunc $src0, undef -> copy $src0 - MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI); - if (Src1Def && Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) { - MI.setDesc(TII.get(AMDGPU::COPY)); - MI.RemoveOperand(2); - return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI) && - RBI.constrainGenericRegister(Src0, AMDGPU::SReg_32RegClass, *MRI); - } - Register ShiftSrc0; Register ShiftSrc1; int64_t ShiftAmt; Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -35,7 +35,7 @@ ; The key problem here is that we may fail to create an MBB referenced by a ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things ; happen. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %6:gpr(s32), %2:gpr(p0) :: (store seq_cst 4 into %ir.addr) (in function: pending_phis) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %{{[0-9]+}}:gpr(s32), %{{[0-9]+}}:gpr(p0) :: (store seq_cst 4 into %ir.addr) (in function: pending_phis) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for pending_phis ; FALLBACK-WITH-REPORT-OUT-LABEL: pending_phis: define i32 @pending_phis(i1 %tst, i32 %val, i32* %addr) { @@ -54,7 +54,7 @@ } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(<7 x s32>), %0:_(p0) :: (store 28 into %ir.addr, align 32) (in function: odd_vector) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(<7 x s32>), %{{[0-9]+}}:_(p0) :: (store 28 into %ir.addr, align 32) (in function: odd_vector) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector: define void @odd_vector(<7 x i32>* %addr) { @@ -85,7 +85,7 @@ ; Make sure we don't mess up metadata arguments. declare void @llvm.write_register.i64(metadata, i64) -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_WRITE_REGISTER !0, %0:_(s64) (in function: test_write_register_intrin) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_WRITE_REGISTER !0, %{{[0-9]+}}:_(s64) (in function: test_write_register_intrin) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_write_register_intrin ; FALLBACK-WITH-REPORT-LABEL: test_write_register_intrin: define void @test_write_register_intrin() { @@ -96,14 +96,14 @@ @_ZTIi = external global i8* declare i32 @__gxx_personality_v0(...) -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0:_(s128) = G_FCONSTANT fp128 0xL00000000000000004000000000000000 +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(s128) = G_FCONSTANT fp128 0xL00000000000000004000000000000000 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_quad_dump ; FALLBACK-WITH-REPORT-OUT-LABEL: test_quad_dump: define fp128 @test_quad_dump() { ret fp128 0xL00000000000000004000000000000000 } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %2:_(p0) = G_EXTRACT_VECTOR_ELT %{{[0-9]+}}:_(<2 x p0>), %{{[0-9]+}}:_(s64) (in function: vector_of_pointers_extractelement) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(p0) = G_EXTRACT_VECTOR_ELT %{{[0-9]+}}:_(<2 x p0>), %{{[0-9]+}}:_(s64) (in function: vector_of_pointers_extractelement) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_extractelement ; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_extractelement: @var = global <2 x i16*> zeroinitializer @@ -120,7 +120,7 @@ br label %block } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %2:_(<2 x p0>) = G_INSERT_VECTOR_ELT %0:_, %{{[0-9]+}}:_(p0), %{{[0-9]+}}:_(s32) (in function: vector_of_pointers_insertelement) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<2 x p0>) = G_INSERT_VECTOR_ELT %0:_, %{{[0-9]+}}:_(p0), %{{[0-9]+}}:_(s32) (in function: vector_of_pointers_insertelement) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement ; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement: define void @vector_of_pointers_insertelement() { @@ -150,17 +150,17 @@ ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(s96) = G_INSERT %{{[0-9]+}}:_, %{{[0-9]+}}:_(s32), 64 (in function: nonpow2_or_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_or_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_or_narrowing: -define void @nonpow2_or_narrowing() { - %a = add i128 undef, undef +define void @nonpow2_or_narrowing(i128 %x, i128 %y, i128 %z, i128 %w) { + %a = add i128 %x, %y %b = trunc i128 %a to i96 - %a2 = add i128 undef, undef + %a2 = add i128 %z, %w %b2 = trunc i128 %a2 to i96 %dummy = or i96 %b, %b2 store i96 %dummy, i96* undef ret void } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0:_(s96) = G_INSERT %10:_, %8:_(s32), 64 (in function: nonpow2_load_narrowing) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(s96) = G_INSERT %{{[0-9]+}}:_, %{{[0-9]+}}:_(s32), 64 (in function: nonpow2_load_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_load_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_load_narrowing: define void @nonpow2_load_narrowing() { @@ -193,7 +193,7 @@ ret i32 0 } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: %2:fpr(<4 x s16>) = G_ZEXT %0:fpr(<4 x s8>) (in function: zext_v4s8) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: %{{[0-9]+}}:fpr(<4 x s16>) = G_ZEXT %{{[0-9]+}}:fpr(<4 x s8>) (in function: zext_v4s8) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for zext_v4s8 ; FALLBACK-WITH-REPORT-OUT-LABEL: zext_v4s8 define <4 x i16> @zext_v4s8(<4 x i8> %in) { Index: llvm/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir @@ -19,9 +19,9 @@ body: | bb.0: ; CHECK-LABEL: name: implicit_def - ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF - ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[DEF]], [[DEF]] - ; CHECK: $w0 = COPY [[ADDWrr]] + ; CHECK: [[DEF:%[0-9]+]]:gpr32common = IMPLICIT_DEF + ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[DEF]], 0, 0 + ; CHECK: $w0 = COPY [[ADDWri]] %0(s32) = G_IMPLICIT_DEF %1(s32) = G_ADD %0, %0 $w0 = COPY %1(s32) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -136,7 +136,7 @@ ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 ; CHECK-NEXT: s_cselect_b32 s4, 1, 0 -; CHECK-NEXT: s_xor_b32 s4, s4, 1 +; CHECK-NEXT: s_not_b32 s4, s4 ; CHECK-NEXT: s_and_b32 s4, s4, 1 ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 ; CHECK-NEXT: s_cbranch_scc0 BB4_6 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir @@ -477,10 +477,8 @@ bb.0: ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_impdef_constant - ; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123 - ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[S_MOV_B32_]] - ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8060928 + ; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]] %0:sgpr(s32) = G_IMPLICIT_DEF %1:sgpr(s32) = G_CONSTANT i32 123 %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1 @@ -535,12 +533,8 @@ bb.0: ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_zext_impdef_zext_constant - ; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123 - ; GFX9: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[DEF]], 1048576, implicit-def $scc - ; GFX9: [[S_BFE_U32_1:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_MOV_B32_]], 1048576, implicit-def $scc - ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_BFE_U32_]], [[S_BFE_U32_1]] - ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8060928 + ; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]] %0:sgpr(s16) = G_IMPLICIT_DEF %1:sgpr(s16) = G_CONSTANT i16 123 %2:sgpr(s32) = G_ZEXT %0 @@ -579,10 +573,8 @@ bb.0: ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_anyext_constant_anyext_constant - ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123 - ; GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 456 - ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[S_MOV_B32_1]] - ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 29884539 + ; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]] %0:sgpr(s16) = G_CONSTANT i16 123 %1:sgpr(s16) = G_CONSTANT i16 456 %2:sgpr(s32) = G_ANYEXT %0 @@ -601,10 +593,8 @@ bb.0: ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_anyext_impdef_anyext_constant - ; GFX9: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123 - ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[DEF]], [[S_MOV_B32_]] - ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8060928 + ; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]] %0:sgpr(s16) = G_IMPLICIT_DEF %1:sgpr(s16) = G_CONSTANT i16 123 %2:sgpr(s32) = G_ANYEXT %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -710,7 +710,6 @@ ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[DEF1]](s32) - ; GFX9: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) @@ -721,12 +720,10 @@ ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[COPY6]](s32) ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[DEF2]], [[BUILD_VECTOR_TRUNC4]](<2 x s16>) - ; GFX9: [[ASHR2:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL2]], [[BUILD_VECTOR_TRUNC4]](<2 x s16>) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>), [[ASHR2]](<2 x s16>) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[C2]](s32) + ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[ASHR]](<2 x s16>), [[ASHR1]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0 ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT1]](<3 x s16>), 0 ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT1]](<4 x s16>) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll @@ -122,19 +122,15 @@ ; GFX906-LABEL: v_fdot2_inline_literal_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_movk_i32 s4, 0x4000 -; GFX906-NEXT: s_pack_ll_b32_b16 s4, s4, s4 -; GFX906-NEXT: v_dot2_f32_f16 v0, s4, v0, v1 +; GFX906-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fdot2_inline_literal_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_movk_i32 s4, 0x4000 +; GFX10-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s4 -; GFX10-NEXT: v_dot2_f32_f16 v0, s4, v0, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> , <2 x half> %b, float %c, i1 false) ret float %ret @@ -144,19 +140,15 @@ ; GFX906-LABEL: v_fdot2_inline_literal_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_movk_i32 s4, 0x4000 -; GFX906-NEXT: s_pack_ll_b32_b16 s4, s4, s4 -; GFX906-NEXT: v_dot2_f32_f16 v0, v0, s4, v1 +; GFX906-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fdot2_inline_literal_b: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_movk_i32 s4, 0x4000 +; GFX10-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1] ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s4 -; GFX10-NEXT: v_dot2_f32_f16 v0, v0, s4, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> , float %c, i1 false) ret float %ret Index: llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -10,7 +10,7 @@ ; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 ; GFX9-NEXT: s_mov_b32 s0, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_xor_b32 s1, s1, 1 +; GFX9-NEXT: s_not_b32 s1, s1 ; GFX9-NEXT: s_and_b32 s1, s1, 1 ; GFX9-NEXT: s_cmp_lg_u32 s1, 0 ; GFX9-NEXT: s_cbranch_scc0 BB0_2 @@ -85,7 +85,7 @@ ; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0 ; GFX9-NEXT: s_mov_b32 s0, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_xor_b32 s1, s1, 1 +; GFX9-NEXT: s_not_b32 s1, s1 ; GFX9-NEXT: s_and_b32 s1, s1, 1 ; GFX9-NEXT: s_cmp_lg_u32 s1, 0 ; GFX9-NEXT: s_cbranch_scc0 BB1_2