package ws.palladian.preprocessing.segmentation;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactoryConfigurationError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import ws.palladian.helper.UrlHelper;
import ws.palladian.helper.collection.Bag;
import ws.palladian.helper.html.HtmlHelper;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.helper.math.SetSimilarities;
import ws.palladian.retrieval.DocumentRetriever;

/* loaded from: input_file:ws/palladian/preprocessing/segmentation/PageSegmenterTrainer.class */
public class PageSegmenterTrainer {
    private static final Logger LOGGER = LoggerFactory.getLogger(PageSegmenterTrainer.class);

    public static void performDetailedParameterCheckForGivenValues(String str, String str2, int i, int i2) throws MalformedURLException, IOException {
        DocumentRetriever documentRetriever = new DocumentRetriever();
        PageSegmenter pageSegmenter = new PageSegmenter();
        File[] readURLsFromDisc = readURLsFromDisc(str2);
        Bag<String> createFingerprint = pageSegmenter.createFingerprint(documentRetriever.getWebDocument(str), i, i2);
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(str2 + "results_" + i + "_" + i2 + ".xls"));
        bufferedWriter.write("Original file: " + str);
        bufferedWriter.newLine();
        bufferedWriter.newLine();
        bufferedWriter.write("Similarity\tJaccard\tAverage\tFilename");
        bufferedWriter.newLine();
        bufferedWriter.newLine();
        for (int i3 = 0; i3 < readURLsFromDisc.length; i3++) {
            LOGGER.info(pageSegmenter.createFingerprint(documentRetriever.getWebDocument(readURLsFromDisc[i3].toString()), i, i2).toString());
            Double valueOf = Double.valueOf(Math.round((1.0d - SimilarityCalculator.calculateSimilarity(createFingerprint, r0)) * 100.0d) / 100.0d);
            Double valueOf2 = Double.valueOf(Math.round(SetSimilarities.JACCARD.getSimilarity(createFingerprint.uniqueItems(), r0.uniqueItems()) * 100.0d) / 100.0d);
            Double valueOf3 = Double.valueOf(Math.round(((valueOf.doubleValue() + valueOf2.doubleValue()) / 2.0d) * 100.0d) / 100.0d);
            LOGGER.info("vari: " + valueOf + "   jacc: " + valueOf2 + "   aver: " + valueOf3);
            bufferedWriter.write(valueOf + "\t" + valueOf2 + "\t" + valueOf3 + "\t" + readURLsFromDisc[i3].toString().replace(str2, ""));
            bufferedWriter.newLine();
        }
        bufferedWriter.close();
    }

    public static Double performAverageParameterCheckForGivenValues(String str, String str2, int i, int i2) throws MalformedURLException, IOException {
        DocumentRetriever documentRetriever = new DocumentRetriever();
        PageSegmenter pageSegmenter = new PageSegmenter();
        Double.valueOf(0.0d);
        ArrayList arrayList = new ArrayList();
        File[] readURLsFromDisc = readURLsFromDisc(str2);
        Bag<String> createFingerprint = pageSegmenter.createFingerprint(documentRetriever.getWebDocument(str), i, i2);
        for (File file : readURLsFromDisc) {
            Bag<String> createFingerprint2 = pageSegmenter.createFingerprint(documentRetriever.getWebDocument(file.toString()), i, i2);
            arrayList.add(Double.valueOf(Math.round(((Double.valueOf(Math.round((1.0d - SimilarityCalculator.calculateSimilarity(createFingerprint, createFingerprint2)) * 100.0d) / 100.0d).doubleValue() + Double.valueOf(Math.round(SetSimilarities.JACCARD.getSimilarity(createFingerprint.uniqueItems(), createFingerprint2.uniqueItems()) * 100.0d) / 100.0d).doubleValue()) / 2.0d) * 100.0d) / 100.0d));
        }
        Double valueOf = Double.valueOf(0.0d);
        for (int i3 = 0; i3 < arrayList.size(); i3++) {
            valueOf = Double.valueOf(valueOf.doubleValue() + ((Double) arrayList.get(i3)).doubleValue());
        }
        return Double.valueOf(valueOf.doubleValue() / arrayList.size());
    }

    public static void performParameterCheck(String str, String str2, int[] iArr, int[] iArr2, Boolean bool) throws MalformedURLException, IOException {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < iArr.length; i++) {
            for (int i2 = 0; i2 < iArr2.length; i2++) {
                LOGGER.info("number: " + iArr[i] + ", length: " + iArr2[i2]);
                if (bool.booleanValue()) {
                    performDetailedParameterCheckForGivenValues(str, str2, iArr[i], iArr2[i2]);
                } else {
                    arrayList.add("[" + iArr[i] + "][" + iArr2[i2] + "] " + performAverageParameterCheckForGivenValues(str, str2, iArr[i], iArr2[i2]));
                }
            }
        }
        for (int i3 = 0; i3 < arrayList.size(); i3++) {
            LOGGER.info((String) arrayList.get(i3));
        }
    }

    public static void saveURLToDisc(String str, String str2) throws TransformerFactoryConfigurationError, TransformerException, IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new URL(str).openConnection().getInputStream()));
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(str2));
        LOGGER.info("geht los-----");
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                bufferedWriter.close();
                return;
            } else {
                bufferedWriter.write(readLine);
                bufferedWriter.newLine();
            }
        }
    }

    public static File[] readURLsFromDisc(String str) {
        File[] listFiles = new File(str).listFiles(new FilenameFilter() { // from class: ws.palladian.preprocessing.segmentation.PageSegmenterTrainer.1
            @Override // java.io.FilenameFilter
            public boolean accept(File file, String str2) {
                return str2.endsWith(".html");
            }
        });
        LOGGER.info("files(" + listFiles.length + "):----------\n" + listFiles[0]);
        return listFiles;
    }

    public static void saveAllURLsToDisc(String str, int i) throws TransformerFactoryConfigurationError, TransformerException, IOException {
        String str2;
        Document webDocument = new DocumentRetriever().getWebDocument(UrlHelper.getDomain(str));
        new HashSet();
        Set links = HtmlHelper.getLinks(webDocument, true, false, "");
        LOGGER.info(links.size() + " intern verlinkte URLs gefunden!");
        LOGGER.info(links.toString());
        Iterator it = links.iterator();
        String labelOfURL = PageSegmenterHelper.getLabelOfURL(str);
        for (int i2 = 0; it.hasNext() && i2 < i; i2++) {
            String str3 = (String) it.next();
            LOGGER.info(str3);
            String labelOfURL2 = PageSegmenterHelper.getLabelOfURL(str3);
            String replaceAll = UrlHelper.getCleanUrl(str3).replace("/", "_").replaceAll("[[^\\w\\däüöÄÜÖ\\+\\- ]]", "_");
            LOGGER.info(replaceAll + "\n" + labelOfURL2);
            if (labelOfURL.equals(labelOfURL2)) {
                str2 = "test\\aehnlich\\" + replaceAll;
                LOGGER.info("-->ähnlich");
            } else {
                str2 = "test\\unaehnlich\\" + replaceAll;
                LOGGER.info("-->nicht ähnlich");
            }
            saveURLToDisc(str3, str2);
        }
    }

    public static void saveChosenURLsToDisc() throws TransformerFactoryConfigurationError, TransformerException, IOException {
        String[] strArr = {"http://www.wer-weiss-was.de", "http://www.wikipedia.de", "http://www.google.de", "http://www.youtube.com", "http://www.wetter.com", "http://www.wissen.de", "http://dict.leo.org", "http://www.juraforum.de", "http://www.tomshardware.de", "http://www.treiber.de", "http://www.pixelquelle.de", "http://www.ebay.de", "http://www.expedia.de", "http://www.expedia.de/last-minute/default.aspx", "http://cgi.ebay.de/ws/eBayISAPI.dll?ViewItem&item=220680636640", "http://www.treiber.de/treiber-download/Anchor-Datacomm-updates", "http://www.amazon.com", "http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=mouse", "http://wissen.de/wde/generator/wissen/ressorts/geschichte/was_geschah_am/index.html?day=13&month=10&year=1900&suchen=Suchen", "http://maps.google.de"};
        for (int i = 0; i < strArr.length; i++) {
            String replaceAll = strArr[i].substring(7).replace("/", "_").replaceAll("[[^\\w\\däüöÄÜÖ\\+\\- ]]", "_");
            saveURLToDisc(strArr[i], "test_2\\unaehnlich2\\" + replaceAll);
            LOGGER.info(replaceAll + " erfolgreich!");
        }
    }

    public static void downladRandomSitesForEvaluation2(String str, int i, int i2) {
        HashSet hashSet = new HashSet();
        int i3 = 0;
        LOGGER.info("Ausgangs-URL: " + str);
        Document webDocument = new DocumentRetriever().getWebDocument(str);
        new HashSet();
        int i4 = 0;
        for (String str2 : HtmlHelper.getLinks(webDocument, true, false, "")) {
            if (i4 == i) {
                hashSet.add(str2);
                LOGGER.info("1actURL: " + str2);
                i3++;
                i4 = 0;
            }
            i4++;
            if (i3 == i2) {
                break;
            }
        }
        LOGGER.info("---------------\nEVA-LINKS:");
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            LOGGER.info((String) it.next());
        }
    }

    private static String convertXPath(String str) {
        String str2 = "";
        String[] split = str.toUpperCase().split("/");
        for (int i = 1; i < split.length; i++) {
            String str3 = split[i];
            if (i >= 3 && !str3.endsWith("]")) {
                str3 = str3 + "[1]";
            }
            str2 = str2 + "/" + str3;
        }
        return str2;
    }

    private static List<String> saveEvaluationFiles(String str, List<Document> list, String str2, String str3) throws TransformerFactoryConfigurationError, TransformerException, IOException {
        ArrayList arrayList = new ArrayList();
        String str4 = str2 + str3 + ".html";
        saveURLToDisc(str, str4);
        arrayList.add(str4);
        for (int i = 0; i < list.size(); i++) {
            String str5 = str2 + str3 + "a" + (i + 1) + ".html";
            saveURLToDisc(list.get(i).getDocumentURI(), str5);
            arrayList.add(str5);
        }
        return arrayList;
    }

    private static List<String> readEvaluationFiles(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(str + str2 + ".html");
        for (int i = 0; i < 5; i++) {
            if (FileHelper.tryReadFileToString(str + str2 + "a" + (i + 1) + ".html").length() > 0) {
                arrayList.add(str + str2 + "a" + (i + 1) + ".html");
            }
        }
        return arrayList;
    }

    public static void cvsTest(String str, String str2, String str3, boolean z) throws ParserConfigurationException, IOException, TransformerFactoryConfigurationError, TransformerException {
        List<String> readEvaluationFiles;
        String str4 = str + str2 + "\\" + str3 + "\\";
        String str5 = str + "\\evaluation.csv";
        List readFileToArray = FileHelper.readFileToArray(str4 + str3 + ".csv");
        String str6 = ((String) readFileToArray.get(1)).split(";")[0];
        LOGGER.info("Länge: " + readFileToArray.size());
        new ArrayList();
        if (z) {
            PageSegmenter pageSegmenter = new PageSegmenter();
            pageSegmenter.setDocument(str6);
            pageSegmenter.startPageSegmentation();
            readEvaluationFiles = saveEvaluationFiles(str6, pageSegmenter.getSimilarFiles(), str4, str3);
        } else {
            readEvaluationFiles = readEvaluationFiles(str4, str3);
        }
        PageSegmenter pageSegmenter2 = new PageSegmenter();
        pageSegmenter2.setDocument(readEvaluationFiles.get(0));
        DocumentRetriever documentRetriever = new DocumentRetriever();
        ArrayList arrayList = new ArrayList();
        for (int i = 1; i < readEvaluationFiles.size(); i++) {
            arrayList.add(documentRetriever.getWebDocument(readEvaluationFiles.get(i)));
        }
        pageSegmenter2.setSimilarFiles(arrayList);
        pageSegmenter2.startPageSegmentation();
        List<Segment> allSegments = pageSegmenter2.getAllSegments();
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        for (int i5 = 1; i5 < readFileToArray.size(); i5++) {
            String convertXPath = convertXPath(((String) readFileToArray.get(i5)).split(";")[1]);
            String str7 = ((String) readFileToArray.get(i5)).split(";")[3];
            LOGGER.info("guessPath: " + convertXPath);
            for (int i6 = 0; i6 < allSegments.size(); i6++) {
                Segment segment = allSegments.get(i6);
                String convertXPath2 = convertXPath(segment.getXPath());
                String str8 = segment.getVariability().doubleValue() < 0.42d ? "u" : segment.getVariability().doubleValue() >= 0.58d ? "v" : "n";
                if (convertXPath.equals(convertXPath2)) {
                    LOGGER.info("gefunden!");
                    i2++;
                    readFileToArray.set(i5, ((String) readFileToArray.get(i5)) + ";1;" + str8);
                    if (str7.equals(str8)) {
                        LOGGER.info("farbe stimmt");
                        i3++;
                    }
                    if (str8 == "n") {
                        i4++;
                    }
                }
            }
            if (((String) readFileToArray.get(i5)).split(";").length <= 5) {
                readFileToArray.set(i5, ((String) readFileToArray.get(i5)) + ";0;0");
            }
            if (((String) readFileToArray.get(i5)).split(";")[3].equals(((String) readFileToArray.get(i5)).split(";")[5])) {
                readFileToArray.set(i5, ((String) readFileToArray.get(i5)) + ";1");
            } else {
                readFileToArray.set(i5, ((String) readFileToArray.get(i5)) + ";0");
            }
        }
        String str9 = ("" + (readFileToArray.size() - 1)) + ";" + ("" + i2) + ";" + ("" + allSegments.size()) + ";" + ("" + i3) + ";" + ("" + ((readFileToArray.size() - 1) - i3)) + ";" + ("" + i4);
        readFileToArray.add("");
        readFileToArray.add("guessed XP;found XP;all XP;corr. Label;incorr. Label;not assignabel");
        readFileToArray.add(str9);
        FileHelper.writeToFile(str4 + str3 + "_ausgewertet.csv", readFileToArray);
        List readFileToArray2 = FileHelper.readFileToArray(str5);
        boolean z2 = false;
        for (int i7 = 1; i7 < readFileToArray2.size(); i7++) {
            if (((String) readFileToArray2.get(i7)).split(";")[0].equals(((String) readFileToArray.get(1)).split(";")[0])) {
                readFileToArray2.set(i7, ((String) readFileToArray2.get(i7)).split(";")[0] + ";" + str9);
                z2 = true;
            }
        }
        if (!z2) {
            readFileToArray2.add(((String) readFileToArray.get(1)).split(";")[0] + ";" + str9);
        }
        FileHelper.writeToFile(str5, readFileToArray2);
    }
}
