Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2205,7 +2205,7 @@ else if (VT == MVT::i128) LC = RTLIB::MUL_I128; - if (LC == RTLIB::UNKNOWN_LIBCALL) { + if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { // We'll expand the multiplication by brute force because we have no other // options. This is a trivially-generalized version of the code from // Hacker's Delight (itself derived from Knuth's Algorithm M from section Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -435,6 +435,10 @@ setOperationAction(ISD::SELECT, MVT::v4f32, Promote); AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32); + // There are no libcalls of any kind. + for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) + setLibcallName(static_cast(I), nullptr); + setBooleanContents(ZeroOrNegativeOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); Index: test/CodeGen/AMDGPU/mul.ll =================================================================== --- test/CodeGen/AMDGPU/mul.ll +++ test/CodeGen/AMDGPU/mul.ll @@ -198,3 +198,75 @@ store i64 %3, i64 addrspace(1)* %out ret void } + +; FIXME: Load dwordx4 +; FUNC-LABEL: {{^}}s_mul_i128: +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 + + +; SI: v_mul_hi_u32 +; SI: v_mul_hi_u32 +; SI: s_mul_i32 +; SI: v_mul_hi_u32 +; SI: s_mul_i32 +; SI: s_mul_i32 +; SI: v_mul_hi_u32 +; SI: v_mul_hi_u32 +; SI: s_mul_i32 +; SI-DAG: s_mul_i32 +; SI-DAG: v_mul_hi_u32 +; SI: s_mul_i32 +; SI: s_mul_i32 +; SI: s_mul_i32 +; SI: s_mul_i32 +; SI: s_mul_i32 + +; SI: buffer_store_dwordx4 +define void @s_mul_i128(i128 addrspace(1)* %out, i128 %a, i128 %b) nounwind #0 { + %mul = mul i128 %a, %b + store i128 %mul, i128 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_mul_i128: +; SI: {{buffer|flat}}_load_dwordx4 +; SI: {{buffer|flat}}_load_dwordx4 + +; SI: v_mul_lo_i32 +; SI: v_mul_hi_u32 +; SI: v_mul_hi_u32 +; SI: v_mul_lo_i32 +; SI: v_mul_hi_u32 +; SI: v_mul_hi_u32 +; SI: v_mul_lo_i32 +; SI: v_mul_lo_i32 +; SI: v_add_i32_e32 +; SI: v_mul_hi_u32 +; SI: v_mul_lo_i32 +; SI: v_mul_hi_u32 +; SI: v_mul_lo_i32 +; SI: v_mul_lo_i32 +; SI: v_mul_lo_i32 +; SI: v_mul_lo_i32 +; SI: v_mul_lo_i32 + +; SI: {{buffer|flat}}_store_dwordx4 +define void @v_mul_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %aptr, i128 addrspace(1)* %bptr) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep.a = getelementptr inbounds i128, i128 addrspace(1)* %aptr, i32 %tid + %gep.b = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid + %gep.out = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid + %a = load i128, i128 addrspace(1)* %gep.a + %b = load i128, i128 addrspace(1)* %gep.b + %mul = mul i128 %a, %b + store i128 %mul, i128 addrspace(1)* %gep.out + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone}