package nl.inl.blacklab.tools;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.Iterator;
import java.util.Properties;
import nl.inl.blacklab.index.Indexer;
import nl.inl.util.FileUtil;
import org.apache.commons.text.StringEscapeUtils;

/* loaded from: input_file:nl/inl/blacklab/tools/SketchToXmlConverter.class */
public class SketchToXmlConverter {
    private static final int LINES_PER_CHUNK_FILE = 30000;
    private int linesDone;
    boolean inSentence = false;
    boolean inDoc = false;
    private boolean docWasEmpty = true;
    private String lastDocLine = "(no docs processed yet)";

    public static void main(String[] strArr) throws IOException {
        Properties propertiesFromResource = getPropertiesFromResource("anwcorpus.properties");
        File fileProp = getFileProp(propertiesFromResource, "sketchDir", null);
        convertList(new File(fileProp, "lijst.txt"), fileProp, getFileProp(propertiesFromResource, "inputDir", "input", null));
    }

    public static Properties getPropertiesFromResource(String str) throws IOException {
        InputStream resourceAsStream = SketchToXmlConverter.class.getClassLoader().getResourceAsStream(str);
        try {
            if (resourceAsStream == null) {
                throw new IllegalArgumentException("Properties file not found: " + str + " (must be accessible from the classpath)");
            }
            Properties properties = new Properties();
            properties.load(resourceAsStream);
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
            return properties;
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public static File getFileProp(Properties properties, String str, File file) {
        return getFileProp(properties, str, null, file);
    }

    public static File getFileProp(Properties properties, String str, String str2, File file) {
        Object obj = properties.get(str);
        if (obj == null) {
            obj = str2;
        }
        if (obj == null) {
            return null;
        }
        File file2 = new File(obj.toString());
        return (file == null || file2.isAbsolute()) ? file2 : new File(file, file2.getPath());
    }

    public boolean processLine(String str, Writer writer) throws IOException {
        boolean z = true;
        if (str.length() >= 1 && str.charAt(0) == '<' && str.endsWith(">")) {
            if (str.charAt(1) == '/') {
                if (str.equals("</s>")) {
                    if (!this.inSentence) {
                        System.err.println("Sentence close without open!");
                    }
                    this.inSentence = false;
                    this.docWasEmpty = false;
                } else if (str.equals("</doc>")) {
                    if (!this.inDoc) {
                        System.err.println("Doc close without open! After: " + this.lastDocLine);
                    }
                    if (this.docWasEmpty) {
                        System.err.println("Empty document");
                    }
                    if (this.inSentence) {
                        writer.append("</s>\n");
                    }
                    this.inSentence = false;
                    this.inDoc = false;
                    if (this.linesDone >= LINES_PER_CHUNK_FILE) {
                        z = false;
                    }
                } else {
                    System.err.println("Unknown end tag: " + str);
                }
            } else if (str.equals("<s>")) {
                if (this.inSentence) {
                    System.err.println("Nested sentence!");
                }
                this.docWasEmpty = false;
                this.inSentence = true;
            } else if (!str.equals("<g/>")) {
                if (str.startsWith("<doc")) {
                    this.lastDocLine = str;
                    if (this.inDoc) {
                        writer.append("</doc>\n");
                        System.err.println("--- Unclosed " + (this.docWasEmpty ? "empty " : "") + "document before: " + str);
                        System.err.println("    Fixed: (added close tag)");
                    }
                    str = str.replaceAll("&", "&amp;").replaceAll("\\s\\s+", " ");
                    if (!str.matches("<doc(\\s+\\w+\\s*=\\s*\"[^<>\"]*\")*\\s*>\\s*")) {
                        System.err.println("--- Illegal doc line: " + str);
                        str = str.replaceAll("<URL >", "url=\"").replaceAll("/URL</URL>", "\"").replaceAll("<CLTSTRATUM>EON</CLTSTRATUM>", "cltstratum=\"EON\"").replaceAll("auteur=\"\" ", "auteur=\"").replaceAll("auteurwebtekst=\"\" ", "auteurwebtekst=\"").replaceAll("<FILEDESC\\s+>DOMEIN\\s+</FILEDESC>", "filedesc=\"DOMEIN\"").replaceAll("<TEKST>", "tekst=\"").replaceAll("</TEKST>", "\"");
                        if (str.matches("<doc(\\s+\\w+\\s*=\\s*\"[^<>\"]*\")*\\s*>\\s*")) {
                            System.err.println("    Fixed: " + str);
                        } else {
                            System.err.println("!!! Failed: " + str);
                        }
                    }
                    this.inDoc = true;
                    this.docWasEmpty = true;
                } else {
                    System.err.println("Unknown tag: " + str);
                }
            }
            writer.append((CharSequence) str);
        } else if (str.indexOf(9) < 0) {
            writer.append("<pu>").append((CharSequence) StringEscapeUtils.escapeXml10(str)).append("</pu>");
            this.docWasEmpty = false;
        } else {
            String[] split = StringEscapeUtils.escapeXml10(str).split("\t", 3);
            writer.append("<w p=\"").append((CharSequence) split[1]).append("\" l=\"").append((CharSequence) split[2]).append("\">").append((CharSequence) split[0]).append("</w>");
            this.docWasEmpty = false;
        }
        writer.append('\n');
        this.linesDone++;
        return z;
    }

    public void convert(Reader reader, File file, String str) throws IOException {
        int i = 0;
        Writer openOutFile = openOutFile(file, str, 0);
        try {
            if (!(reader instanceof BufferedReader)) {
                reader = new BufferedReader(reader);
            }
            BufferedReader bufferedReader = (BufferedReader) reader;
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return;
                }
                if (!processLine(readLine.trim(), openOutFile)) {
                    i++;
                    closeOutFile(openOutFile);
                    openOutFile = openOutFile(file, str, i);
                }
            }
        } finally {
            closeOutFile(openOutFile);
        }
    }

    private void closeOutFile(Writer writer) throws IOException {
        if (this.inDoc) {
            writer.append("</doc>\n");
            System.err.println("Unclosed document at end of chunk");
        }
        writer.append("</docs>\n");
        writer.close();
    }

    private Writer openOutFile(File file, String str, int i) throws IOException {
        this.linesDone = 0;
        if (i > 0) {
            int lastIndexOf = str.lastIndexOf(46);
            str = str.substring(0, lastIndexOf) + " (" + (i + 1) + ")" + str.substring(lastIndexOf);
        }
        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(new File(file, str))), Indexer.DEFAULT_INPUT_ENCODING);
        outputStreamWriter.append((CharSequence) ("<?xml version=\"1.0\" encoding=\"" + Indexer.DEFAULT_INPUT_ENCODING.name() + "\" ?>\n")).append((CharSequence) "<?xml-stylesheet type=\"text/xsl\" href=\"xsl/corpus.xsl\" ?>\n").append((CharSequence) ("<docs file=\"" + str + "\">\n"));
        return outputStreamWriter;
    }

    private static void convertList(File file, File file2, File file3) throws FileNotFoundException, IOException {
        SketchToXmlConverter sketchToXmlConverter = new SketchToXmlConverter();
        Iterator it = FileUtil.readLines(file).iterator();
        while (it.hasNext()) {
            convertFile(sketchToXmlConverter, new File(file2, (String) it.next()), file3);
        }
    }

    private static void convertFile(SketchToXmlConverter sketchToXmlConverter, File file, File file2) throws IOException {
        InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file), Indexer.DEFAULT_INPUT_ENCODING);
        try {
            String name = file.getName();
            sketchToXmlConverter.convert(inputStreamReader, file2, name.substring(0, name.lastIndexOf(46)) + ".xml");
            inputStreamReader.close();
        } catch (Throwable th) {
            try {
                inputStreamReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }
}
