package eus.ixa.ixa.pipe.nerc;

import com.google.common.collect.Lists;
import eus.ixa.ixa.pipe.nerc.dict.Dictionaries;
import eus.ixa.ixa.pipe.nerc.train.Flags;
import ixa.kaflib.Entity;
import ixa.kaflib.KAFDocument;
import ixa.kaflib.Term;
import ixa.kaflib.WF;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.Span;

/* loaded from: input_file:eus/ixa/ixa/pipe/nerc/Annotate.class */
public class Annotate {
    private NameFactory nameFactory = new NameFactory();
    private StatisticalNameFinder nameFinder;
    private Dictionaries dictionaries;
    private DictionariesNameFinder dictFinder;
    private NumericNameFinder numericLexerFinder;
    private boolean statistical;
    private boolean postProcess;
    private boolean dictTag;
    private boolean lexerFind;
    private String clearFeatures;

    /* loaded from: input_file:eus/ixa/ixa/pipe/nerc/Annotate$BIO.class */
    private enum BIO {
        BEGIN("B-"),
        IN("I-"),
        OUT("O");

        String tag;

        BIO(String str) {
            this.tag = str;
        }

        @Override // java.lang.Enum
        public String toString() {
            return this.tag;
        }
    }

    public Annotate(Properties properties) throws IOException {
        this.clearFeatures = properties.getProperty("clearFeatures");
        annotateOptions(properties);
    }

    private void annotateOptions(Properties properties) throws IOException {
        String property = properties.getProperty("ruleBasedOption");
        String property2 = properties.getProperty("dictTag");
        String property3 = properties.getProperty("dictPath");
        if (property2.equals("off")) {
            if (property.equals("off")) {
                this.lexerFind = false;
                this.statistical = true;
                this.dictTag = false;
                this.postProcess = false;
                this.nameFinder = new StatisticalNameFinder(properties, this.nameFactory);
                return;
            }
            this.lexerFind = true;
            this.statistical = true;
            this.dictTag = false;
            this.postProcess = false;
            this.nameFinder = new StatisticalNameFinder(properties, this.nameFactory);
            return;
        }
        if (property3.equals("off")) {
            Flags.dictionaryException();
        }
        if (!property.equals("off")) {
            this.lexerFind = true;
        }
        if (property3.equals("off")) {
            return;
        }
        if (this.dictionaries == null) {
            this.dictionaries = new Dictionaries(property3);
            this.dictFinder = new DictionariesNameFinder(this.dictionaries, this.nameFactory);
        }
        if (property2.equalsIgnoreCase("tag")) {
            this.dictTag = true;
            this.postProcess = false;
            this.statistical = false;
        } else {
            if (property2.equalsIgnoreCase("post")) {
                this.nameFinder = new StatisticalNameFinder(properties, this.nameFactory);
                this.statistical = true;
                this.postProcess = true;
                this.dictTag = false;
                return;
            }
            this.nameFinder = new StatisticalNameFinder(properties, this.nameFactory);
            this.statistical = true;
            this.dictTag = false;
            this.postProcess = false;
        }
    }

    public StatisticalNameFinder getStatisticalNameFinder() {
        return this.nameFinder;
    }

    public final void annotateNEs(KAFDocument kAFDocument) throws IOException {
        ArrayList arrayList = null;
        for (List<WF> list : kAFDocument.getSentences()) {
            String[] strArr = new String[list.size()];
            String[] strArr2 = new String[list.size()];
            for (int i = 0; i < list.size(); i++) {
                strArr[i] = list.get(i).getForm();
                strArr2[i] = list.get(i).getId();
            }
            if (this.statistical) {
                if (this.clearFeatures.equalsIgnoreCase("docstart") && strArr[0].startsWith("-DOCSTART-")) {
                    this.nameFinder.clearAdaptiveData();
                }
                arrayList = Lists.newArrayList(this.nameFinder.nercToSpans(strArr));
            }
            if (this.postProcess) {
                Span[] nercToSpansExact = this.dictFinder.nercToSpansExact(strArr);
                SpanUtils.postProcessDuplicatedSpans(arrayList, nercToSpansExact);
                SpanUtils.concatenateSpans(arrayList, nercToSpansExact);
            }
            if (this.dictTag) {
                arrayList = Lists.newArrayList(this.dictFinder.nercToSpansExact(strArr));
            }
            if (this.lexerFind) {
                this.numericLexerFinder = new NumericNameFinder(new BufferedReader(new StringReader(StringUtils.getStringFromTokens(strArr))), this.nameFactory);
                SpanUtils.concatenateSpans(arrayList, this.numericLexerFinder.nercToSpans(strArr));
            }
            Span[] dropOverlappingSpans = NameFinderME.dropOverlappingSpans((Span[]) arrayList.toArray(new Span[arrayList.size()]));
            new ArrayList();
            for (Name name : this.statistical ? this.nameFinder.getNamesFromSpans(dropOverlappingSpans, strArr) : this.dictFinder.getNamesFromSpans(dropOverlappingSpans, strArr)) {
                ixa.kaflib.Span<Term> newTermSpan = KAFDocument.newTermSpan(kAFDocument.getTermsFromWFs(Arrays.asList(Arrays.copyOfRange(strArr2, Integer.valueOf(name.getSpan().getStart()).intValue(), Integer.valueOf(name.getSpan().getEnd()).intValue()))));
                ArrayList arrayList2 = new ArrayList();
                arrayList2.add(newTermSpan);
                kAFDocument.newEntity(arrayList2).setType(name.getType());
            }
            if (this.clearFeatures.equalsIgnoreCase("yes")) {
                this.nameFinder.clearAdaptiveData();
            }
        }
        this.nameFinder.clearAdaptiveData();
    }

    public final String annotateNEsToKAF(KAFDocument kAFDocument) {
        return kAFDocument.toString();
    }

    public final String annotateNEsToOpenNLP(KAFDocument kAFDocument) {
        StringBuilder sb = new StringBuilder();
        ArrayList arrayList = null;
        for (List<WF> list : kAFDocument.getSentences()) {
            String[] strArr = new String[list.size()];
            String[] strArr2 = new String[list.size()];
            for (int i = 0; i < list.size(); i++) {
                strArr[i] = list.get(i).getForm();
                strArr2[i] = list.get(i).getId();
            }
            if (this.statistical) {
                if (this.clearFeatures.equalsIgnoreCase("docstart") && strArr[0].startsWith("-DOCSTART-")) {
                    this.nameFinder.clearAdaptiveData();
                }
                arrayList = Lists.newArrayList(this.nameFinder.nercToSpans(strArr));
            }
            if (this.postProcess) {
                Span[] nercToSpansExact = this.dictFinder.nercToSpansExact(strArr);
                SpanUtils.postProcessDuplicatedSpans(arrayList, nercToSpansExact);
                SpanUtils.concatenateSpans(arrayList, nercToSpansExact);
            }
            if (this.dictTag) {
                arrayList = Lists.newArrayList(this.dictFinder.nercToSpansExact(strArr));
            }
            if (this.lexerFind) {
                this.numericLexerFinder = new NumericNameFinder(new BufferedReader(new StringReader(StringUtils.getStringFromTokens(strArr))), this.nameFactory);
                SpanUtils.concatenateSpans(arrayList, this.numericLexerFinder.nercToSpans(strArr));
            }
            boolean z = false;
            if (this.clearFeatures.equalsIgnoreCase("yes")) {
                z = true;
            }
            sb.append(new NameSample(strArr, NameFinderME.dropOverlappingSpans((Span[]) arrayList.toArray(new Span[arrayList.size()])), z).toString()).append("\n");
        }
        this.nameFinder.clearAdaptiveData();
        return sb.toString();
    }

    public String annotateNEsToCoNLL2003(KAFDocument kAFDocument) {
        List<Entity> entities = kAFDocument.getEntities();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        for (Entity entity : entities) {
            for (ixa.kaflib.Span<Term> span : entity.getSpans()) {
                Term firstTarget = span.getFirstTarget();
                hashMap.put(firstTarget.getId(), Integer.valueOf(span.size()));
                hashMap2.put(firstTarget.getId(), entity.getType());
            }
        }
        List<List<WF>> sentences = kAFDocument.getSentences();
        StringBuilder sb = new StringBuilder();
        Iterator<List<WF>> it = sentences.iterator();
        while (it.hasNext()) {
            List<Term> sentenceTerms = kAFDocument.getSentenceTerms(it.next().get(0).getSent());
            String str = null;
            boolean z = false;
            int i = 0;
            while (i < sentenceTerms.size()) {
                Term term = sentenceTerms.get(i);
                if (hashMap.get(term.getId()) != null) {
                    int intValue = ((Integer) hashMap.get(term.getId())).intValue();
                    String convertToConLLTypes = convertToConLLTypes((String) hashMap2.get(term.getId()));
                    if (intValue > 1) {
                        for (int i2 = 0; i2 < intValue; i2++) {
                            Term term2 = sentenceTerms.get(i + i2);
                            sb.append(term2.getForm());
                            sb.append("\t");
                            sb.append(term2.getLemma());
                            sb.append("\t");
                            sb.append(term2.getMorphofeat());
                            sb.append("\t");
                            if (i2 == 0 && z && str.equalsIgnoreCase(convertToConLLTypes)) {
                                sb.append(BIO.BEGIN.toString());
                            } else {
                                sb.append(BIO.IN.toString());
                            }
                            sb.append(convertToConLLTypes);
                            sb.append("\n");
                        }
                    } else {
                        sb.append(term.getForm());
                        sb.append("\t");
                        sb.append(term.getLemma());
                        sb.append("\t");
                        sb.append(term.getMorphofeat());
                        sb.append("\t");
                        if (z && str.equalsIgnoreCase(convertToConLLTypes)) {
                            sb.append(BIO.BEGIN.toString());
                        } else {
                            sb.append(BIO.IN.toString());
                        }
                        sb.append(convertToConLLTypes);
                        sb.append("\n");
                    }
                    z = true;
                    str = convertToConLLTypes;
                    i += intValue - 1;
                } else {
                    sb.append(term.getForm());
                    sb.append("\t");
                    sb.append(term.getLemma());
                    sb.append("\t");
                    sb.append(term.getMorphofeat());
                    sb.append("\t");
                    sb.append(BIO.OUT);
                    sb.append("\n");
                    z = false;
                    str = BIO.OUT.toString();
                }
                i++;
            }
            sb.append("\n");
        }
        return sb.toString();
    }

    public String annotateNEsToCoNLL2002(KAFDocument kAFDocument) {
        List<Entity> entities = kAFDocument.getEntities();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        for (Entity entity : entities) {
            for (ixa.kaflib.Span<Term> span : entity.getSpans()) {
                Term firstTarget = span.getFirstTarget();
                hashMap.put(firstTarget.getId(), Integer.valueOf(span.size()));
                hashMap2.put(firstTarget.getId(), entity.getType());
            }
        }
        List<List<WF>> sentences = kAFDocument.getSentences();
        StringBuilder sb = new StringBuilder();
        Iterator<List<WF>> it = sentences.iterator();
        while (it.hasNext()) {
            List<Term> sentenceTerms = kAFDocument.getSentenceTerms(it.next().get(0).getSent());
            int i = 0;
            while (i < sentenceTerms.size()) {
                Term term = sentenceTerms.get(i);
                if (hashMap.get(term.getId()) != null) {
                    int intValue = ((Integer) hashMap.get(term.getId())).intValue();
                    String convertToConLLTypes = convertToConLLTypes((String) hashMap2.get(term.getId()));
                    if (intValue > 1) {
                        for (int i2 = 0; i2 < intValue; i2++) {
                            Term term2 = sentenceTerms.get(i + i2);
                            sb.append(term2.getForm());
                            sb.append("\t");
                            sb.append(term2.getLemma());
                            sb.append("\t");
                            sb.append(term2.getMorphofeat());
                            sb.append("\t");
                            if (i2 == 0) {
                                sb.append(BIO.BEGIN.toString());
                            } else {
                                sb.append(BIO.IN.toString());
                            }
                            sb.append(convertToConLLTypes);
                            sb.append("\n");
                        }
                    } else {
                        sb.append(term.getForm());
                        sb.append("\t");
                        sb.append(term.getLemma());
                        sb.append("\t");
                        sb.append(term.getMorphofeat());
                        sb.append("\t");
                        sb.append(BIO.BEGIN.toString());
                        sb.append(convertToConLLTypes);
                        sb.append("\n");
                    }
                    i += intValue - 1;
                } else {
                    sb.append(term.getForm());
                    sb.append("\t");
                    sb.append(term.getLemma());
                    sb.append("\t");
                    sb.append(term.getMorphofeat());
                    sb.append("\t");
                    sb.append(BIO.OUT);
                    sb.append("\n");
                }
                i++;
            }
            sb.append("\n");
        }
        return sb.toString();
    }

    public String convertToConLLTypes(String str) {
        return (str.equalsIgnoreCase("PERSON") || str.equalsIgnoreCase("ORGANIZATION") || str.equalsIgnoreCase("LOCATION") || str.length() == 3) ? str.substring(0, 3) : str;
    }
}
