diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4548,8 +4548,10 @@ static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) { - if (auto *C = dyn_cast(Op)) + if (auto *C = dyn_cast(Op)) { + C = ConstantFoldConstant(C, Q.DL); return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL); + } if (auto *CI = dyn_cast(Op)) { auto *Src = CI->getOperand(0); diff --git a/llvm/test/Transforms/InstSimplify/constantfold-cast-expression-not-already-folded-operand.ll b/llvm/test/Transforms/InstSimplify/constantfold-cast-expression-not-already-folded-operand.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/constantfold-cast-expression-not-already-folded-operand.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -instsimplify --data-layout="E-p:16:16" | FileCheck --check-prefix=CHECK-BIG %s +; RUN: opt < %s -S -instsimplify --data-layout="e-p:16:16-p:16:16" | FileCheck --check-prefix=CHECK-LITTLE %s + +; This test is based on a reduced version of input, produced by some +; combination of inline/sroa/unroll. When used as input to gvn/newgvn it +; resulted in huge constant expressions due to not being folded +; properly. Given the unreduced input opt would crash due to out-of-memory, or +; too large files being produced, when dumping the IR in text form (or one +; would hit timeouts becase compilation was really slow). +; +; We verify that this is folded properly by instsimplify. The gvn and newgvn +; passes are using SimplifyInstruction, so if it works for instsimplify it +; should work for other passes using SimplifyInstruction as well. +define i16 @main() { +; CHECK-BIG-LABEL: @main( +; CHECK-BIG-NEXT: entry: +; CHECK-BIG-NEXT: tail call void @my.cmp(i16 276, i16 0) +; CHECK-BIG-NEXT: ret i16 0 +; +; CHECK-LITTLE-LABEL: @main( +; CHECK-LITTLE-NEXT: entry: +; CHECK-LITTLE-NEXT: tail call void @my.cmp(i16 16, i16 260) +; CHECK-LITTLE-NEXT: ret i16 0 +; +entry: + %tnr.sroa.0.0.vec.extract162 = extractelement <2 x i16> bitcast (<1 x i32> to <2 x i16>), i32 0 + %tnr.sroa.0.1.vec.extract234 = extractelement <2 x i16> bitcast (<1 x i32> to <2 x i16>), i32 1 + %add32.i = add i16 %tnr.sroa.0.0.vec.extract162, 4 + %tnr.sroa.7.0.insert.ext.i = zext i16 %tnr.sroa.0.1.vec.extract234 to i32 + %tnr.sroa.0.0.insert.ext.i = zext i16 %add32.i to i32 + %tnr.sroa.0.0.insert.shift.i = shl nuw i32 %tnr.sroa.0.0.insert.ext.i, 16 + %tnr.sroa.0.0.insert.insert.i = or i32 %tnr.sroa.0.0.insert.shift.i, %tnr.sroa.7.0.insert.ext.i + %0 = bitcast i32 %tnr.sroa.0.0.insert.insert.i to <2 x i16> + %tnr.sroa.0.0.vec.extract164 = extractelement <2 x i16> %0, i32 0 + %tnr.sroa.0.1.vec.extract236 = extractelement <2 x i16> %0, i32 1 + %add32.i50 = add i16 %tnr.sroa.0.0.vec.extract164, 4 + %tnr.sroa.7.0.insert.ext.i51 = zext i16 %tnr.sroa.0.1.vec.extract236 to i32 + %tnr.sroa.0.0.insert.ext.i52 = zext i16 %add32.i50 to i32 + %tnr.sroa.0.0.insert.shift.i53 = shl nuw i32 %tnr.sroa.0.0.insert.ext.i52, 16 + %tnr.sroa.0.0.insert.insert.i54 = or i32 %tnr.sroa.0.0.insert.shift.i53, %tnr.sroa.7.0.insert.ext.i51 + %1 = bitcast i32 %tnr.sroa.0.0.insert.insert.i54 to <2 x i16> + %tnr.sroa.0.0.vec.extract166 = extractelement <2 x i16> %1, i32 0 + %tnr.sroa.0.1.vec.extract238 = extractelement <2 x i16> %1, i32 1 + %add32.i55 = add i16 %tnr.sroa.0.0.vec.extract166, 4 + %tnr.sroa.7.0.insert.ext.i56 = zext i16 %tnr.sroa.0.1.vec.extract238 to i32 + %tnr.sroa.0.0.insert.ext.i57 = zext i16 %add32.i55 to i32 + %tnr.sroa.0.0.insert.shift.i58 = shl nuw i32 %tnr.sroa.0.0.insert.ext.i57, 16 + %tnr.sroa.0.0.insert.insert.i59 = or i32 %tnr.sroa.0.0.insert.shift.i58, %tnr.sroa.7.0.insert.ext.i56 + %2 = bitcast i32 %tnr.sroa.0.0.insert.insert.i59 to <2 x i16> + %tnr.sroa.0.0.vec.extract168 = extractelement <2 x i16> %2, i32 0 + %tnr.sroa.0.1.vec.extract240 = extractelement <2 x i16> %2, i32 1 + %add32.i60 = add i16 %tnr.sroa.0.0.vec.extract168, 4 + %tnr.sroa.7.0.insert.ext.i61 = zext i16 %tnr.sroa.0.1.vec.extract240 to i32 + %tnr.sroa.0.0.insert.ext.i62 = zext i16 %add32.i60 to i32 + %tnr.sroa.0.0.insert.shift.i63 = shl nuw i32 %tnr.sroa.0.0.insert.ext.i62, 16 + %tnr.sroa.0.0.insert.insert.i64 = or i32 %tnr.sroa.0.0.insert.shift.i63, %tnr.sroa.7.0.insert.ext.i61 + %3 = bitcast i32 %tnr.sroa.0.0.insert.insert.i64 to <2 x i16> + %tnr.sroa.0.0.vec.extract170 = extractelement <2 x i16> %3, i32 0 + %tnr.sroa.0.1.vec.extract242 = extractelement <2 x i16> %3, i32 1 + %add32.i65 = add i16 %tnr.sroa.0.0.vec.extract170, 4 + %tnr.sroa.7.0.insert.ext.i66 = zext i16 %tnr.sroa.0.1.vec.extract242 to i32 + %tnr.sroa.0.0.insert.ext.i67 = zext i16 %add32.i65 to i32 + %tnr.sroa.0.0.insert.shift.i68 = shl nuw i32 %tnr.sroa.0.0.insert.ext.i67, 16 + %tnr.sroa.0.0.insert.insert.i69 = or i32 %tnr.sroa.0.0.insert.shift.i68, %tnr.sroa.7.0.insert.ext.i66 + %4 = bitcast i32 %tnr.sroa.0.0.insert.insert.i69 to <2 x i16> + %tnr.sroa.0.0.vec.extract172 = extractelement <2 x i16> %4, i32 0 + %tnr.sroa.0.1.vec.extract244 = extractelement <2 x i16> %4, i32 1 + %add32.i70 = add i16 %tnr.sroa.0.0.vec.extract172, 4 + %tnr.sroa.7.0.insert.ext.i71 = zext i16 %tnr.sroa.0.1.vec.extract244 to i32 + %tnr.sroa.0.0.insert.ext.i72 = zext i16 %add32.i70 to i32 + %tnr.sroa.0.0.insert.shift.i73 = shl nuw i32 %tnr.sroa.0.0.insert.ext.i72, 16 + %tnr.sroa.0.0.insert.insert.i74 = or i32 %tnr.sroa.0.0.insert.shift.i73, %tnr.sroa.7.0.insert.ext.i71 + %5 = bitcast i32 %tnr.sroa.0.0.insert.insert.i74 to <2 x i16> + %tnr.sroa.0.0.vec.extract174 = extractelement <2 x i16> %5, i32 0 + %tnr.sroa.0.1.vec.extract246 = extractelement <2 x i16> %5, i32 1 + %add32.i75 = add i16 %tnr.sroa.0.0.vec.extract174, 4 + %tnr.sroa.7.0.insert.ext.i76 = zext i16 %tnr.sroa.0.1.vec.extract246 to i32 + %tnr.sroa.0.0.insert.ext.i77 = zext i16 %add32.i75 to i32 + %tnr.sroa.0.0.insert.shift.i78 = shl nuw i32 %tnr.sroa.0.0.insert.ext.i77, 16 + %tnr.sroa.0.0.insert.insert.i79 = or i32 %tnr.sroa.0.0.insert.shift.i78, %tnr.sroa.7.0.insert.ext.i76 + %6 = bitcast i32 %tnr.sroa.0.0.insert.insert.i79 to <2 x i16> + %tnr.sroa.0.0.vec.extract176 = extractelement <2 x i16> %6, i32 0 + %tnr.sroa.0.1.vec.extract248 = extractelement <2 x i16> %6, i32 1 + %add32.i80 = add i16 %tnr.sroa.0.0.vec.extract176, 4 + %tnr.sroa.7.0.insert.ext.i81 = zext i16 %tnr.sroa.0.1.vec.extract248 to i32 + %tnr.sroa.0.0.insert.ext.i82 = zext i16 %add32.i80 to i32 + %tnr.sroa.0.0.insert.shift.i83 = shl nuw i32 %tnr.sroa.0.0.insert.ext.i82, 16 + %tnr.sroa.0.0.insert.insert.i84 = or i32 %tnr.sroa.0.0.insert.shift.i83, %tnr.sroa.7.0.insert.ext.i81 + %7 = bitcast i32 %tnr.sroa.0.0.insert.insert.i84 to <2 x i16> + %a1 = extractelement <2 x i16> %7, i32 0 + %a2 = extractelement <2 x i16> %7, i32 1 + tail call void @my.cmp(i16 %a1, i16 %a2) #0 + ret i16 0 +} + +declare void @my.cmp(i16, i16) + + + +; An additional test, but without bitcast being involved. This should fold to +; a constant vector. +define <2 x i32> @test_sext() { +; CHECK-BIG-LABEL: @test_sext( +; CHECK-BIG-NEXT: entry: +; CHECK-BIG-NEXT: ret <2 x i32> +; +; CHECK-LITTLE-LABEL: @test_sext( +; CHECK-LITTLE-NEXT: entry: +; CHECK-LITTLE-NEXT: ret <2 x i32> +; +entry: + %sext = sext <2 x i16> to <2 x i32> + ret <2 x i32> %sext +}