/*
 * Decompiled with CFR 0.152.
 */
package org.allenai.pdffigures2;

import java.io.Serializable;
import java.lang.invoke.LambdaMetafactory;
import org.allenai.pdffigures2.CaptionDetector;
import org.allenai.pdffigures2.CaptionStart;
import org.allenai.pdffigures2.DocumentLayout;
import org.allenai.pdffigures2.FigureType$;
import org.allenai.pdffigures2.Line;
import org.allenai.pdffigures2.Logging;
import org.allenai.pdffigures2.Logging$logger$;
import org.allenai.pdffigures2.Logging$loggerConfig$;
import org.allenai.pdffigures2.Page;
import org.allenai.pdffigures2.Paragraph;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.slf4j.Logger;
import scala.Enumeration;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.SeqView;
import scala.collection.SeqView$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.TraversableViewLike;
import scala.collection.immutable.$colon$colon;
import scala.collection.immutable.Iterable$;
import scala.collection.immutable.List;
import scala.collection.immutable.Map;
import scala.collection.immutable.Map$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.math.Numeric$IntIsIntegral$;
import scala.math.Ordering$Double$;
import scala.math.Ordering$Int$;
import scala.runtime.BooleanRef;
import scala.runtime.BoxesRunTime;
import scala.runtime.Nothing$;
import scala.runtime.ObjectRef;
import scala.util.matching.Regex;

public final class CaptionDetector$
implements Logging {
    public static CaptionDetector$ MODULE$;
    private final int MaxDuplicateCaptionNames;
    private final int MaxSamePageDuplicateCaptionNames;
    private final int MaxHeightForCaptionLines;
    private final double MinCommonFontPercentage;
    private final Regex captionStartRegex;
    private final Regex captionNumberRegex;
    private final Logger internalLogger;
    private volatile Logging$logger$ logger$module;
    private volatile Logging$loggerConfig$ loggerConfig$module;

    static {
        new CaptionDetector$();
    }

    @Override
    public Logger internalLogger() {
        return this.internalLogger;
    }

    @Override
    public Logging$logger$ logger() {
        if (this.logger$module == null) {
            this.logger$lzycompute$1();
        }
        return this.logger$module;
    }

    @Override
    public Logging$loggerConfig$ loggerConfig() {
        if (this.loggerConfig$module == null) {
            this.loggerConfig$lzycompute$1();
        }
        return this.loggerConfig$module;
    }

    @Override
    public void org$allenai$pdffigures2$Logging$_setter_$internalLogger_$eq(Logger x$1) {
        this.internalLogger = x$1;
    }

    private int MaxDuplicateCaptionNames() {
        return this.MaxDuplicateCaptionNames;
    }

    private int MaxSamePageDuplicateCaptionNames() {
        return this.MaxSamePageDuplicateCaptionNames;
    }

    private int MaxHeightForCaptionLines() {
        return this.MaxHeightForCaptionLines;
    }

    private double MinCommonFontPercentage() {
        return this.MinCommonFontPercentage;
    }

    private Regex captionStartRegex() {
        return this.captionStartRegex;
    }

    private Regex captionNumberRegex() {
        return this.captionNumberRegex;
    }

    public Seq<CaptionStart> findCaptions(Seq<Page> pages, DocumentLayout layout) {
        Seq<CaptionStart> candidates = this.findCaptionCandidates(pages);
        Tuple2 tuple2 = (Tuple2)layout.fontCounts().maxBy((Function1<Tuple2, Object> & Serializable & scala.Serializable)x$1 -> BoxesRunTime.boxToDouble(x$1._2$mcD$sp()), Ordering$Double$.MODULE$);
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        PDFont standardFont = (PDFont)tuple2._1();
        double count = tuple2._2$mcD$sp();
        Tuple2<PDFont, Double> tuple22 = new Tuple2<PDFont, Double>(standardFont, BoxesRunTime.boxToDouble(count));
        PDFont standardFont2 = tuple22._1();
        double count2 = tuple22._2$mcD$sp();
        Seq fontFilters = count2 > this.MinCommonFontPercentage() ? (Seq)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.NonStandardFont(standardFont2, (Set)Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray((Object[])new Enumeration.Value[]{FigureType$.MODULE$.Figure(), FigureType$.MODULE$.Table()})))), (List<Nothing$>)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.NonStandardFont(standardFont2, (Set)Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray((Object[])new Enumeration.Value[]{FigureType$.MODULE$.Table()})))), (List<Nothing$>)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.NonStandardFont(standardFont2, (Set)Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray((Object[])new Enumeration.Value[]{FigureType$.MODULE$.Figure()})))), Nil$.MODULE$))) : (Seq)Nil$.MODULE$;
        Seq<CaptionDetector.CandidateFilter> filters = ((TraversableLike)((TraversableLike)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.ColonOnly()), (List<Nothing$>)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.AllCapsFigOnly()), (List<Nothing$>)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.AllCapsTableOnly()), Nil$.MODULE$)))).$plus$plus(fontFilters, Seq$.MODULE$.canBuildFrom())).$plus$plus(new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.AbbreviatedFigOnly()), (List<Nothing$>)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.FigureHasFollowingTextOnly()), (List<Nothing$>)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.PeriodOnly()), (List<Nothing$>)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.LeftAlignedOnly(false)), (List<Nothing$>)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.LeftAlignedOnly(true)), (List<Nothing$>)new $colon$colon<Nothing$>((Nothing$)((Object)new CaptionDetector.LineEndOnly()), Nil$.MODULE$)))))), Seq$.MODULE$.canBuildFrom());
        return this.selectCaptionCandidates(candidates, filters);
    }

    public Seq<CaptionStart> findCaptionCandidates(Seq<Page> pages) {
        Seq<CaptionStart> candidates = pages.flatMap((Function1<Page, Seq> & Serializable & scala.Serializable)page -> page.paragraphs().flatMap((Function1<Paragraph, SeqView> & Serializable & scala.Serializable)paragraph -> {
            BooleanRef paragraphStart = BooleanRef.create(true);
            return ((TraversableViewLike)paragraph.lines().view().zipWithIndex(SeqView$.MODULE$.canBuildFrom())).flatMap(arg_0 -> CaptionDetector$.$anonfun$findCaptionCandidates$3(paragraph, page, paragraphStart, arg_0), SeqView$.MODULE$.canBuildFrom());
        }, Seq$.MODULE$.canBuildFrom()), Seq$.MODULE$.canBuildFrom());
        return candidates;
    }

    public Seq<CaptionStart> selectCaptionCandidates(Seq<CaptionStart> candidates, Seq<CaptionDetector.CandidateFilter> filters) {
        ObjectRef groupedById = ObjectRef.create(candidates.groupBy((Function1<CaptionStart, Tuple2> & Serializable & scala.Serializable)x$4 -> x$4.figId()));
        BooleanRef removedAny = BooleanRef.create(true);
        while (removedAny.elem && ((Map)groupedById.elem).values().exists((Function1<Seq, Object> & Serializable & scala.Serializable)x$5 -> BoxesRunTime.boxToBoolean(CaptionDetector$.$anonfun$selectCaptionCandidates$2(x$5)))) {
            Option filterToUse = filters.find((Function1<CaptionDetector.CandidateFilter, Object> & Serializable & scala.Serializable)filter -> BoxesRunTime.boxToBoolean(CaptionDetector$.$anonfun$selectCaptionCandidates$3(groupedById, filter)));
            if (filterToUse.nonEmpty()) {
                groupedById.elem = ((Map)groupedById.elem).map((Function1<Tuple2, Tuple2> & Serializable & scala.Serializable)x0$3 -> {
                    Tuple2 tuple2 = x0$3;
                    if (tuple2 != null) {
                        Tuple2 figId = (Tuple2)tuple2._1();
                        Seq candidatesForId = (Seq)tuple2._2();
                        CaptionDetector.CandidateFilter candidateFilter = (CaptionDetector.CandidateFilter)filterToUse.get();
                        return new Tuple2(figId, candidatesForId.filter((Function1<CaptionStart, Object> & Serializable & scala.Serializable)cc -> BoxesRunTime.boxToBoolean(candidateFilter.accept(cc))));
                    }
                    throw new MatchError(tuple2);
                }, Map$.MODULE$.canBuildFrom());
                this.logger().debug((Function0<String> & Serializable & scala.Serializable)() -> new StringBuilder(27).append("Applied filter ").append(((CaptionDetector.CandidateFilter)filterToUse.get()).name()).append(", ").append(((TraversableOnce)((Map)groupedById$1.elem).values().map((Function1<Seq, Object> & Serializable & scala.Serializable)x$8 -> BoxesRunTime.boxToInteger(x$8.size()), scala.collection.Iterable$.MODULE$.canBuildFrom())).sum(Numeric$IntIsIntegral$.MODULE$)).append(" remaining").toString());
                continue;
            }
            removedAny.elem = false;
            groupedById.elem = ((Map)groupedById.elem).map((Function1<Tuple2, Tuple2> & Serializable & scala.Serializable)x0$4 -> {
                Tuple2 tuple2 = x0$4;
                if (tuple2 != null) {
                    Tuple2 figureId = (Tuple2)tuple2._1();
                    Seq candidatesForId = (Seq)tuple2._2();
                    Seq filtered = (Seq)candidatesForId.filter((Function1<CaptionStart, Object> & Serializable & scala.Serializable)x$9 -> BoxesRunTime.boxToBoolean(x$9.paragraphStart()));
                    if (filtered.nonEmpty()) {
                        if (filtered.size() < candidatesForId.size()) {
                            removedAny$1.elem = true;
                        }
                        return new Tuple2<Tuple2, Seq>(figureId, filtered);
                    }
                    return new Tuple2<Tuple2, Seq>(figureId, candidatesForId);
                }
                throw new MatchError(tuple2);
            }, Map$.MODULE$.canBuildFrom());
            if (removedAny.elem) continue;
            this.logger().debug((Function0<String> & Serializable & scala.Serializable)() -> new StringBuilder(41).append("Filtered for paragraph starts, ").append(((TraversableOnce)((Map)groupedById$1.elem).values().map((Function1<Seq, Object> & Serializable & scala.Serializable)x$10 -> BoxesRunTime.boxToInteger(x$10.size()), scala.collection.Iterable$.MODULE$.canBuildFrom())).sum(Numeric$IntIsIntegral$.MODULE$)).append(" remaining").toString());
        }
        Map filteredCaptionStarts = (Map)((Map)groupedById.elem).filter((Function1<Tuple2, Object> & Serializable & scala.Serializable)x0$5 -> BoxesRunTime.boxToBoolean(CaptionDetector$.$anonfun$selectCaptionCandidates$16(x0$5)));
        return ((TraversableOnce)filteredCaptionStarts.values().flatten(Predef$.MODULE$.$conforms())).toSeq();
    }

    private final void logger$lzycompute$1() {
        CaptionDetector$ captionDetector$ = this;
        synchronized (captionDetector$) {
            if (this.logger$module == null) {
                this.logger$module = new Logging$logger$(this);
            }
        }
    }

    private final void loggerConfig$lzycompute$1() {
        CaptionDetector$ captionDetector$ = this;
        synchronized (captionDetector$) {
            if (this.loggerConfig$module == null) {
                this.loggerConfig$module = new Logging$loggerConfig$(this);
            }
        }
    }

    public static final /* synthetic */ String $anonfun$findCaptionCandidates$4() {
        return "Warning: Crazy height for caption line, skipping";
    }

    /*
     * Unable to fully structure code
     * Could not resolve type clashes
     */
    public static final /* synthetic */ Iterable $anonfun$findCaptionCandidates$3(Paragraph paragraph$1, Page page$1, BooleanRef paragraphStart$1, Tuple2 x0$1) {
        block8: {
            var6_4 = x0$1;
            if (var6_4 == null) break block8;
            line = (Line)var6_4._1();
            lineNum = var6_4._2$mcI$sp();
            firstWord = line.words().head().text();
            if (line.words().size() <= 2) ** GOTO lbl-1000
            v0 = line.words().apply(1).text();
            var12_8 = ".";
            if (!(v0 != null ? v0.equals(var12_8) == false : var12_8 != null)) {
                v1 = new Tuple2<String, Integer>(new StringBuilder(1).append(firstWord).append(".").toString(), BoxesRunTime.boxToInteger(2));
            } else lbl-1000:
            // 2 sources

            {
                v1 = var11_9 = new Tuple2<String, Integer>(firstWord, BoxesRunTime.boxToInteger(1));
            }
            if (var11_9 == null) {
                throw new MatchError(var11_9);
            }
            headerStr = var11_9._1();
            wordNumber = var11_9._2$mcI$sp();
            var10_12 = new Tuple2<String, Integer>(headerStr, BoxesRunTime.boxToInteger(wordNumber));
            headerStr = var10_12._1();
            wordNumber = var10_12._2$mcI$sp();
            captionStartMatchOpt = CaptionDetector$.MODULE$.captionStartRegex().findFirstMatchIn(firstWord);
            if (captionStartMatchOpt.nonEmpty() && line.words().size() > 1) {
                captionStartMatch = captionStartMatchOpt.get();
                numberStr = line.words().apply(wordNumber).text();
                captionEndMatchOp = CaptionDetector$.MODULE$.captionNumberRegex().findFirstMatchIn(numberStr);
                v2 = saneHeight = line.boundary().height() < (double)CaptionDetector$.MODULE$.MaxHeightForCaptionLines();
                if (!saneHeight) {
                    CaptionDetector$.MODULE$.logger().debug((Function0<String> & Serializable & scala.Serializable)LambdaMetafactory.altMetafactory(null, null, null, ()Ljava/lang/Object;, $anonfun$findCaptionCandidates$4(), ()Ljava/lang/String;)());
                }
                if (saneHeight && captionEndMatchOp.nonEmpty()) {
                    captionEndMatch = captionEndMatchOp.get();
                    name = captionEndMatch.group(1);
                    figType = captionStartMatch.group(1).charAt(0) == 'F' ? FigureType$.MODULE$.Figure() : FigureType$.MODULE$.Table();
                    nextLine /* !! */  = lineNum == paragraph$1.lines().size() - 1 ? None$.MODULE$ : new Some<Line>(paragraph$1.lines().apply(lineNum + 1));
                    candidate = new CaptionStart(headerStr, name, figType, captionEndMatch.group(2), line, nextLine /* !! */ , page$1.pageNumber(), paragraphStart$1.elem, captionEndMatch.end() == numberStr.length() && line.words().size() == wordNumber + 1);
                    v3 /* !! */  = new Some<CaptionStart>(candidate);
                } else {
                    v3 /* !! */  = None$.MODULE$;
                }
            } else {
                v3 /* !! */  = None$.MODULE$;
            }
            candidates = v3 /* !! */ ;
            paragraphStart$1.elem = false;
            return Option$.MODULE$.option2Iterable(candidates);
        }
        throw new MatchError(var6_4);
    }

    public static final /* synthetic */ boolean $anonfun$selectCaptionCandidates$2(Seq x$5) {
        return x$5.size() > 1;
    }

    public static final /* synthetic */ boolean $anonfun$selectCaptionCandidates$5(CaptionDetector.CandidateFilter filter$1, CaptionStart x$6) {
        return !filter$1.accept(x$6);
    }

    public static final /* synthetic */ boolean $anonfun$selectCaptionCandidates$4(CaptionDetector.CandidateFilter filter$1, Tuple2 x0$1) {
        Tuple2 tuple2 = x0$1;
        if (tuple2 != null) {
            Seq candidatesForId = (Seq)tuple2._2();
            return candidatesForId.exists((Function1<CaptionStart, Object> & Serializable & scala.Serializable)x$6 -> BoxesRunTime.boxToBoolean(CaptionDetector$.$anonfun$selectCaptionCandidates$5(filter$1, x$6)));
        }
        throw new MatchError(tuple2);
    }

    public static final /* synthetic */ boolean $anonfun$selectCaptionCandidates$7(CaptionDetector.CandidateFilter filter$1, CaptionStart x$7) {
        return !filter$1.accept(x$7);
    }

    public static final /* synthetic */ boolean $anonfun$selectCaptionCandidates$6(CaptionDetector.CandidateFilter filter$1, Tuple2 x0$2) {
        Tuple2 tuple2 = x0$2;
        if (tuple2 != null) {
            Seq candidatesForId = (Seq)tuple2._2();
            return candidatesForId.forall((Function1<CaptionStart, Object> & Serializable & scala.Serializable)x$7 -> BoxesRunTime.boxToBoolean(CaptionDetector$.$anonfun$selectCaptionCandidates$7(filter$1, x$7)));
        }
        throw new MatchError(tuple2);
    }

    public static final /* synthetic */ boolean $anonfun$selectCaptionCandidates$3(ObjectRef groupedById$1, CaptionDetector.CandidateFilter filter) {
        boolean filterRemovesAny = ((Map)groupedById$1.elem).exists((Function1<Tuple2, Object> & Serializable & scala.Serializable)x0$1 -> BoxesRunTime.boxToBoolean(CaptionDetector$.$anonfun$selectCaptionCandidates$4(filter, x0$1)));
        boolean filterRemovesGroup = ((Map)groupedById$1.elem).exists((Function1<Tuple2, Object> & Serializable & scala.Serializable)x0$2 -> BoxesRunTime.boxToBoolean(CaptionDetector$.$anonfun$selectCaptionCandidates$6(filter, x0$2)));
        return filterRemovesAny && !filterRemovesGroup;
    }

    public static final /* synthetic */ int $anonfun$selectCaptionCandidates$18(Tuple2 x$12) {
        return ((SeqLike)x$12._2()).size();
    }

    public static final /* synthetic */ boolean $anonfun$selectCaptionCandidates$16(Tuple2 x0$5) {
        Tuple2 tuple2 = x0$5;
        if (tuple2 != null) {
            Tuple2 figId = (Tuple2)tuple2._1();
            Seq captions = (Seq)tuple2._2();
            if (captions.size() > MODULE$.MaxDuplicateCaptionNames() || BoxesRunTime.unboxToInt(((TraversableOnce)captions.groupBy((Function1<CaptionStart, Object> & Serializable & scala.Serializable)x$11 -> BoxesRunTime.boxToInteger(x$11.page())).map((Function1<Tuple2, Object> & Serializable & scala.Serializable)x$12 -> BoxesRunTime.boxToInteger(CaptionDetector$.$anonfun$selectCaptionCandidates$18(x$12)), Iterable$.MODULE$.canBuildFrom())).max(Ordering$Int$.MODULE$)) > MODULE$.MaxSamePageDuplicateCaptionNames()) {
                MODULE$.logger().debug((Function0<String> & Serializable & scala.Serializable)() -> new StringBuilder(56).append("Unable to disambiguate caption candidates for ").append(figId).append(", dropping").toString());
                return false;
            }
            return true;
        }
        throw new MatchError(tuple2);
    }

    private CaptionDetector$() {
        MODULE$ = this;
        Logging.$init$(this);
        this.MaxDuplicateCaptionNames = 3;
        this.MaxSamePageDuplicateCaptionNames = 2;
        this.MaxHeightForCaptionLines = 60;
        this.MinCommonFontPercentage = 0.4;
        this.captionStartRegex = new StringOps(Predef$.MODULE$.augmentString("^(Figure.|Figure|FIGURE|Table|TABLE||Fig.|Fig|FIG.|FIG)$")).r();
        this.captionNumberRegex = new StringOps(Predef$.MODULE$.augmentString("^([1-9][0-9]*.[1-9][0-9]*|[1-9][0-9]*|[IVX]+|[1-9I][0-9I]*|[A-D].[1-9][0-9]*)($|:|.)?")).r();
    }
}

