package ws.palladian.extraction.entity.tagger;

import java.util.ArrayList;
import org.apache.commons.lang3.StringUtils;
import ws.palladian.extraction.content.PageContentExtractorException;
import ws.palladian.extraction.entity.FileFormatParser;
import ws.palladian.extraction.entity.NamedEntityRecognizer;
import ws.palladian.extraction.entity.TaggingFormat;
import ws.palladian.extraction.entity.evaluation.EvaluationResult;
import ws.palladian.extraction.entity.tagger.PalladianNerTrainingSettings;
import ws.palladian.extraction.location.LocationType;
import ws.palladian.extraction.location.PalladianLocationExtractor;
import ws.palladian.extraction.location.persistence.LocationDatabase;
import ws.palladian.helper.html.HtmlHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.persistence.DatabaseManagerFactory;

/* loaded from: input_file:ws/palladian/extraction/entity/tagger/PalladianNerExperiments.class */
public class PalladianNerExperiments {
    public void trainTest() {
        PalladianNer palladianNer = new PalladianNer(PalladianNerTrainingSettings.Builder.english().create());
        palladianNer.train("data/datasets/ner/conll/training.txt", FileFormatParser.getSeedAnnotations("data/namesNerDictionary.txt", -1), "data/temp/conllModel");
        EvaluationResult evaluate = palladianNer.evaluate("data/datasets/ner/conll/test_final.txt", TaggingFormat.COLUMN);
        System.out.println(evaluate.getMucResultsReadable());
        System.out.println(evaluate.getExactMatchResultsReadable());
    }

    public void tag(String str, String str2, NamedEntityRecognizer namedEntityRecognizer) throws PageContentExtractorException {
        String name = namedEntityRecognizer.getName();
        String tag = namedEntityRecognizer.tag(HtmlHelper.stripHtmlTags(str));
        ArrayList arrayList = new ArrayList();
        for (LocationType locationType : LocationType.values()) {
            arrayList.add(locationType.name());
        }
        String join = StringUtils.join(arrayList, "|");
        FileHelper.writeToFile("data/temp/tagged_" + str2 + "_" + name + ".html", FileHelper.tryReadFileToString("data/temp/raw.html").replace("XXX", tag.replaceAll("\\<(" + join + ")\\>", "<span class=\"$1\">").replaceAll("\\</(" + join + ")\\>", "</span>").replace("\n", "<br>")));
    }

    public void tagText(String str, PalladianNer palladianNer) {
    }

    public static void main(String[] strArr) throws PageContentExtractorException {
        new PalladianNerExperiments().tag(HtmlHelper.stripHtmlTags(FileHelper.tryReadFileToString("Q:\\Users\\David\\Desktop\\LocationExtractionDatasetSmall\\text14.txt")), "XXX", new PalladianLocationExtractor(DatabaseManagerFactory.create(LocationDatabase.class, "locations")));
    }
}
