package czsem.gate.plugins;

import czsem.gate.utils.GateUtils;
import czsem.gate.utils.PRSetup;
import gate.AnnotationSet;
import gate.Corpus;
import gate.Document;
import gate.Factory;
import gate.creole.ExecutionException;
import gate.creole.ExecutionInterruptedException;
import gate.creole.ResourceInstantiationException;
import gate.creole.SerialAnalyserController;
import gate.creole.gazetteer.DefaultGazetteer;
import gate.creole.gazetteer.FSMState;
import gate.creole.gazetteer.GazetteerList;
import gate.creole.gazetteer.GazetteerNode;
import gate.creole.gazetteer.LinearNode;
import gate.creole.gazetteer.Lookup;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.creole.metadata.HiddenCreoleParameter;
import gate.creole.metadata.RunTime;
import gate.gui.GazetteerEditor;
import gate.gui.MainFrame;
import java.io.File;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;

@CreoleResource
/* loaded from: input_file:czsem/gate/plugins/LevenshteinWholeLineMatchingGazetteer.class */
public class LevenshteinWholeLineMatchingGazetteer extends DefaultGazetteer {
    private static final long serialVersionUID = 2828791942110531799L;
    private Map<String, Lookup> currentlyReadingNodeMap;
    protected Map<LinearNode, Map<String, Lookup>> lookups = new HashMap();
    private double maxDistance = 0.2d;
    private boolean removeAllSpaces = false;
    private boolean removePunctuation = false;
    private boolean removeRedundantSpaces = true;
    private boolean evaluateOnPrefix = false;

    /* loaded from: input_file:czsem/gate/plugins/LevenshteinWholeLineMatchingGazetteer$Distance.class */
    public static class Distance {
        int distance;
        double normalizeDistance;
        String text1;
        String text2;

        public Distance(int i, double d, String str, String str2) {
            this.distance = i;
            this.normalizeDistance = d;
            this.text1 = str;
            this.text2 = str2;
        }
    }

    /* loaded from: input_file:czsem/gate/plugins/LevenshteinWholeLineMatchingGazetteer$StringLineIterator.class */
    public static class StringLineIterator implements Iterator<String> {
        private String content;
        private int lastStrat = -1;
        private int lastEnd = -1;

        public StringLineIterator(String str) {
            this.content = str;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return this.lastEnd < this.content.length();
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public String next() {
            this.lastStrat = this.lastEnd + 1;
            this.lastEnd = this.content.indexOf(10, this.lastStrat);
            if (this.lastEnd == -1) {
                this.lastEnd = this.content.length();
            }
            return this.content.substring(this.lastStrat, this.lastEnd);
        }

        @Override // java.util.Iterator
        public void remove() {
        }

        public int getLastStrat() {
            return this.lastStrat;
        }

        public int getLastEnd() {
            return this.lastEnd;
        }
    }

    public LevenshteinWholeLineMatchingGazetteer() {
        this.caseSensitive = true;
    }

    @HiddenCreoleParameter
    public void setLongestMatchOnly(Boolean bool) {
        super.setLongestMatchOnly(bool);
    }

    @HiddenCreoleParameter
    public void setWholeWordsOnly(Boolean bool) {
        super.setWholeWordsOnly(bool);
    }

    protected void readList(LinearNode linearNode, boolean z) throws ResourceInstantiationException {
        this.currentlyReadingNodeMap = new HashMap();
        this.lookups.put(linearNode, this.currentlyReadingNodeMap);
        super.readList(linearNode, z);
    }

    public void addLookup(String str, Lookup lookup) {
        this.currentlyReadingNodeMap.put(str, lookup);
        super.addLookup(str, lookup);
    }

    public void execute() throws ExecutionException {
        this.interrupted = false;
        if (this.document == null) {
            throw new ExecutionException("No document to process!");
        }
        AnnotationSet annotations = (this.annotationSetName == null || this.annotationSetName.equals("")) ? this.document.getAnnotations() : this.document.getAnnotations(this.annotationSetName);
        fireStatusChanged("Performing look-up in " + this.document.getName() + "...");
        StringLineIterator stringLineIterator = new StringLineIterator(this.document.getContent().toString());
        while (stringLineIterator.hasNext()) {
            String next = stringLineIterator.next();
            Iterator it = this.definition.iterator();
            while (it.hasNext()) {
                Object next2 = it.next();
                LinearNode linearNode = (LinearNode) next2;
                double d = Double.MAX_VALUE;
                FSMState fSMState = null;
                Iterator it2 = ((GazetteerList) this.definition.getListsByNode().get(next2)).iterator();
                while (it2.hasNext()) {
                    GazetteerNode gazetteerNode = (GazetteerNode) it2.next();
                    String entry = gazetteerNode.getEntry();
                    double min = Math.min(d, getMaxDistance().doubleValue());
                    Distance countDistanceOptimized = countDistanceOptimized(next, entry, min);
                    if (countDistanceOptimized.normalizeDistance <= min) {
                        FSMState fSMState2 = new FSMState(this);
                        Lookup lookup = this.lookups.get(linearNode).get(gazetteerNode.getEntry());
                        if (lookup.features == null) {
                            lookup.features = new LinkedHashMap();
                        }
                        Map map = lookup.features;
                        map.put("distance", Integer.valueOf(countDistanceOptimized.distance));
                        map.put("normalizedDistance", Double.valueOf(countDistanceOptimized.normalizeDistance));
                        map.put("matchedText", entry);
                        map.put("srcText", next);
                        fSMState2.addLookup(lookup);
                        d = countDistanceOptimized.normalizeDistance;
                        fSMState = fSMState2;
                    }
                }
                if (d <= getMaxDistance().doubleValue()) {
                    long lastStrat = stringLineIterator.getLastStrat();
                    long lastEnd = stringLineIterator.getLastEnd() - 1;
                    if (this.evaluateOnPrefix) {
                        lastEnd = (lastStrat + ((Lookup) fSMState.getLookupSet().iterator().next()).features.get("matchedText").toString().length()) - 1;
                    }
                    createLookups(fSMState, lastStrat, lastEnd, annotations);
                }
            }
            if (isInterrupted()) {
                throw new ExecutionInterruptedException("The execution of the " + getName() + " gazetteer has been abruptly interrupted!");
            }
        }
        fireProcessFinished();
        fireStatusChanged("Look-up complete!");
    }

    public static double normalizeDistance(int i, int i2) {
        return i / i2;
    }

    public static String shortenToLength(String str, int i) {
        return str.substring(0, Math.min(i, str.length()));
    }

    public static String removeRedundantSpaces(String str) {
        return str.replaceAll("[\\s\\u00a0]+", " ").trim();
    }

    public static String removeAllSpaces(String str) {
        return str.replaceAll("[\\s\\u00a0]", "");
    }

    public static String removePunctuation(String str) {
        return str.replaceAll("\\p{Punct}", "");
    }

    public Distance countDistanceOptimized(String str, String str2, double d) {
        int i;
        int i2;
        if (this.removePunctuation) {
            str = removePunctuation(str);
            str2 = removePunctuation(str2);
        }
        if (this.removeAllSpaces) {
            str = removeAllSpaces(str);
            str2 = removeAllSpaces(str2);
        } else if (this.removeRedundantSpaces) {
            str = removeRedundantSpaces(str);
            str2 = removeRedundantSpaces(str2);
        }
        if (getEvaluateOnPrefix().booleanValue()) {
            str = shortenToLength(str, str2.length());
        }
        int length = str.length();
        int length2 = str2.length();
        if (length > length2) {
            i = length2;
            i2 = length;
        } else {
            i = length;
            i2 = length2;
        }
        int i3 = i2 - i;
        double normalizeDistance = normalizeDistance(i3, i2);
        if (normalizeDistance > d) {
            return new Distance(i3, normalizeDistance, str, str2);
        }
        if (!this.caseSensitive.booleanValue()) {
            str = str.toLowerCase();
            str2 = str2.toLowerCase();
        }
        int levenshteinDistance = StringUtils.getLevenshteinDistance(str, str2);
        return new Distance(levenshteinDistance, normalizeDistance(levenshteinDistance, i2), str, str2);
    }

    @CreoleParameter(defaultValue = "0.2")
    @RunTime
    public void setMaxDistance(Double d) {
        this.maxDistance = d.doubleValue();
    }

    public Double getMaxDistance() {
        return Double.valueOf(this.maxDistance);
    }

    public Boolean getRemoveAllSpaces() {
        return Boolean.valueOf(this.removeAllSpaces);
    }

    @CreoleParameter(defaultValue = "false")
    @RunTime
    public void setRemoveAllSpaces(Boolean bool) {
        this.removeAllSpaces = bool.booleanValue();
    }

    public Boolean getRemoveRedundantSpaces() {
        return Boolean.valueOf(this.removeRedundantSpaces);
    }

    @CreoleParameter(defaultValue = "true")
    @RunTime
    public void setRemoveRedundantSpaces(Boolean bool) {
        this.removeRedundantSpaces = bool.booleanValue();
    }

    public Boolean getEvaluateOnPrefix() {
        return Boolean.valueOf(this.evaluateOnPrefix);
    }

    @CreoleParameter(defaultValue = "false")
    @RunTime
    public void setEvaluateOnPrefix(Boolean bool) {
        this.evaluateOnPrefix = bool.booleanValue();
    }

    public static void main(String[] strArr) throws Exception {
        GateUtils.initGate();
        GateUtils.registerComponentIfNot(GazetteerEditor.class);
        GateUtils.registerComponentIfNot(LevenshteinWholeLineMatchingGazetteer.class);
        MainFrame.getInstance().setVisible(true);
        SerialAnalyserController buildGatePipeline = PRSetup.buildGatePipeline(new PRSetup[]{new PRSetup.SinglePRSetup((Class<?>) LevenshteinWholeLineMatchingGazetteer.class).putFeature("maxDistance", Double.valueOf(0.1d)).putFeature("caseSensitive", false).putFeature("listsURL", new File("C:/workspace/Collite/Datlowe/DocumentProcessing/datlowe/resources/Gate/SPC/gazetteer/datlowe_spc_gaz_nolemma.def").toURI().toURL()).putFeature("gazetteerFeatureSeparator", "|")}, "gaz");
        Corpus newCorpus = Factory.newCorpus("gaz");
        Document newDocument = Factory.newDocument("9. DATUM PRVNÍ REGISTRACE/PRODLOUŽENÍ REGISTRACE|order=27|heading_number=9\n9. DATUM PRVNÍ REGISTRACE / PRODLOUŽENÍ REGISTRACE|order=27|heading_number=9\n9. DATUM PRVNÍ REGISTRACE A DATUM PRODLOUŽENÍ REGISTRACE|order=27|heading_number=9\n9. datum první registrace/prodloužení registrace");
        newCorpus.add(newDocument);
        buildGatePipeline.setCorpus(newCorpus);
        buildGatePipeline.execute();
        System.err.println(newDocument.getAnnotations());
    }

    public Boolean getRemovePunctuation() {
        return Boolean.valueOf(this.removePunctuation);
    }

    @CreoleParameter(defaultValue = "false")
    @RunTime
    public void setRemovePunctuation(Boolean bool) {
        this.removePunctuation = bool.booleanValue();
    }
}
