package cc.gospy.example.webcapturer;

import cc.gospy.core.Gospy;
import cc.gospy.core.entity.Result;
import cc.gospy.core.entity.Task;
import cc.gospy.core.fetcher.Fetchers;
import cc.gospy.core.fetcher.impl.HttpFetcher;
import cc.gospy.core.pipeline.Pipelines;
import cc.gospy.core.pipeline.impl.HierarchicalFilePipeline;
import cc.gospy.core.processor.Processors;
import cc.gospy.core.processor.impl.JsoupProcessor;
import cc.gospy.core.processor.impl.UniversalProcessor;
import cc.gospy.core.scheduler.Schedulers;
import cc.gospy.core.scheduler.impl.VerifiableScheduler;
import cc.gospy.core.util.StringHelper;
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashSet;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: input_file:cc/gospy/example/webcapturer/WebCapturer.class */
public class WebCapturer {
    public static void main(String[] strArr) {
        Task task = new Task("https://www.zhangjiupeng.com/");
        String str = "D:/temp/" + task.getHost() + "/";
        Gospy.Builder custom = Gospy.custom();
        VerifiableScheduler verifiableScheduler = Schedulers.VerifiableScheduler;
        Gospy.Builder scheduler = custom.setScheduler(VerifiableScheduler.getDefault());
        HttpFetcher httpFetcher = Fetchers.HttpFetcher;
        Gospy.Builder addFetcher = scheduler.addFetcher(HttpFetcher.getDefault());
        JsoupProcessor jsoupProcessor = Processors.JsoupProcessor;
        Gospy.Builder addProcessor = addFetcher.addProcessor(JsoupProcessor.custom().setDocumentExtractor((page, document) -> {
            String relativeUrl;
            Task task2 = page.getTask();
            if (page.getStatusCode() != 200) {
                if (page.getStatusCode() == 302 || page.getStatusCode() == 301) {
                    ArrayList arrayList = new ArrayList();
                    arrayList.add(new Task(page.getExtra().get("Location").toString()));
                    return new Result(arrayList);
                }
                System.err.println(task2 + "\t" + page.getStatusCode());
            }
            HashSet hashSet = new HashSet();
            Elements<Element> select = document.select("a[href]");
            select.addAll(document.select("link[href]"));
            select.addAll(document.select("[src]"));
            for (Element element : select) {
                String attr = element.hasAttr("href") ? element.attr("href") : element.attr("src");
                String substring = attr.indexOf(35) != -1 ? attr.substring(0, attr.indexOf(35)) : attr;
                boolean endsWith = substring.endsWith("/");
                String absoluteUrl = StringHelper.toAbsoluteUrl(task2.getProtocol(), task2.getHost(), task2.getUrl(), endsWith ? substring.concat("null") : substring);
                if (absoluteUrl.matches("^https?://((?!javascript:|mailto:| ).)*") && (relativeUrl = StringHelper.toRelativeUrl(task2.getProtocol(), task2.getHost(), task2.getUrl(), absoluteUrl)) != null) {
                    hashSet.add(new Task(endsWith ? absoluteUrl.substring(0, absoluteUrl.length() - 4) : absoluteUrl));
                    String escapedFileName = StringHelper.toEscapedFileName(relativeUrl.substring(relativeUrl.lastIndexOf(47) + 1));
                    if (element.tagName().equals("a")) {
                        escapedFileName = escapedFileName.endsWith(".html") ? escapedFileName : escapedFileName.concat(".html");
                    }
                    element.attr(element.hasAttr("href") ? "href" : "src", relativeUrl.substring(0, relativeUrl.lastIndexOf(47) + 1).concat(escapedFileName));
                }
            }
            String escapedFileName2 = StringHelper.toEscapedFileName(task2.getUrl().substring(task2.getUrl().lastIndexOf(47) + 1));
            page.getExtra().put("savePath", URLDecoder.decode(StringHelper.cutOffProtocolAndHost(task2.getUrl().substring(0, task2.getUrl().lastIndexOf(47) + 1)).concat(escapedFileName2.endsWith(".html") ? escapedFileName2 : escapedFileName2.concat(".html")), Charset.defaultCharset().name()));
            System.out.println("Saving [" + page.getExtra().get("savePath") + "] ...");
            return new Result(hashSet, document.toString().getBytes());
        }).build());
        UniversalProcessor universalProcessor = Processors.UniversalProcessor;
        Gospy.Builder addProcessor2 = addProcessor.addProcessor(UniversalProcessor.getDefault());
        HierarchicalFilePipeline hierarchicalFilePipeline = Pipelines.HierarchicalFilePipeline;
        addProcessor2.addPipeline(HierarchicalFilePipeline.custom().setBasePath(str).build()).build().addTask(task).start(1);
    }
}
