Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -72,7 +72,7 @@ case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; case ISD::EXTRACT_VECTOR_ELT: - R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; + R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; @@ -171,7 +171,10 @@ } } -SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, keep the extracted value in register. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NewOp.getValueType().getVectorElementType(), Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -428,7 +428,7 @@ SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); Index: test/CodeGen/X86/fp128-extract.ll =================================================================== --- test/CodeGen/X86/fp128-extract.ll +++ test/CodeGen/X86/fp128-extract.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx \ +; RUN: -enable-legalize-types-checking | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx \ +; RUN: -enable-legalize-types-checking | FileCheck %s + +@my_fp128_v0 = global fp128 0xL0, align 16 +@my_fp128_v1 = global fp128 0xL0, align 16 +@my_fp128_v2 = global fp128 0xL0, align 16 + +; Test the softened result of extractelement op code. +define hidden fp128 @TestExtract() local_unnamed_addr align 2 { +entry: + ; Simplified instruction pattern from the output of llvm before r289042, + ; for a boost function ...::insert<...>::traverse<...>(). + %0 = fsub <2 x double> zeroinitializer, undef + %1 = fpext <2 x double> %0 to <2 x fp128> + %2 = extractelement <2 x fp128> %1, i32 1 + %3 = fmul fp128 undef, %2 + %4 = fcmp ogt fp128 %3, 0xL00000000000000003F8F000000000000 + %5 = bitcast fp128* @my_fp128_v0 to <2 x double>* + %6 = load <2 x double>, <2 x double>* %5, align 8 + %7 = bitcast fp128* @my_fp128_v1 to <2 x double>* + %8 = load <2 x double>, <2 x double>* %7, align 8 + %9 = bitcast fp128* @my_fp128_v2 to <2 x double>* + %10 = load <2 x double>, <2 x double>* %9, align 8 + %11 = fcmp olt <2 x double> %6, %8 + %12 = select <2 x i1> %11, <2 x double> %6, <2 x double> %8 + %13 = fcmp ogt <2 x double> %6, %10 + %14 = select <2 x i1> %13, <2 x double> %6, <2 x double> %10 + %15 = fsub <2 x double> %14, %12 + %16 = fpext <2 x double> %15 to <2 x fp128> ; + %17 = extractelement <2 x fp128> %16, i32 0 + %18 = extractelement <2 x fp128> %16, i32 1 + %mul.i36 = fmul fp128 %17, %18 + %19 = extractelement <2 x double> %10, i32 0 + %20 = extractelement <2 x double> %8, i32 0 + %sub.i6 = fsub double %19, %20 + %conv.i6 = fpext double %sub.i6 to fp128 ; + %21 = extractelement <2 x double> %10, i32 1 + %22 = extractelement <2 x double> %8, i32 1 + %23 = fsub double %21, %22 + %conv.i5 = fpext double %23 to fp128 + %mul.i5 = fmul fp128 %conv.i6, %conv.i5 + %sub.i2 = fsub fp128 %mul.i36, %mul.i5 + ret fp128 %sub.i2 +; CHECK-LABEL: TestExtract: +; CHECK: movapd my_fp128_v0(%rip), %xmm0 +; CHECK: movapd my_fp128_v1(%rip), %xmm1 +; CHECK: movapd my_fp128_v2(%rip), %xmm2 +; CHECK: callq __extenddftf2 +; CHECK: callq __extenddftf2 +; CHECK: callq __multf3 +; CHECK: callq __extenddftf2 +; CHECK: callq __extenddftf2 +; CHECK: callq __multf3 +; CHECK: callq __subtf3 +; CHECK: ret +}