package cc.gospy.core.processor.impl;

import cc.gospy.core.TaskFilter;
import cc.gospy.core.entity.Page;
import cc.gospy.core.entity.Result;
import cc.gospy.core.entity.Task;
import cc.gospy.core.processor.Extractor;
import cc.gospy.core.processor.ProcessException;
import cc.gospy.core.processor.Processor;
import cc.gospy.core.util.StringHelper;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/* loaded from: input_file:cc/gospy/core/processor/impl/JsoupProcessor.class */
public class JsoupProcessor implements Processor {
    private Extractor<Document, ?> handler;
    private TaskFilter filter;

    /* loaded from: input_file:cc/gospy/core/processor/impl/JsoupProcessor$Builder.class */
    public static class Builder {
        private Extractor<Document, ?> ha;
        private TaskFilter fi = TaskFilter.HTTP_DEFAULT;

        /* JADX WARN: Multi-variable type inference failed */
        public <T> Builder setDocumentExtractor(Extractor<Document, T> extractor) {
            this.ha = extractor;
            return this;
        }

        public Builder setPageLinkDocumentExtractor() {
            return setDocumentExtractor((page, document) -> {
                if (page.getStatusCode() != 200) {
                    return null;
                }
                Task task = page.getTask();
                HashSet hashSet = new HashSet();
                Iterator it = document.select("a[href]").iterator();
                while (it.hasNext()) {
                    String attr = ((Element) it.next()).attr("href");
                    if (attr != null && !attr.equals("")) {
                        hashSet.add(new Task(StringHelper.toAbsoluteUrl(task.getProtocol(), task.getHost(), task.getUrl(), attr)));
                    }
                }
                return new Result(hashSet, task.toString());
            });
        }

        public Builder setFullLinkDocumentExtractor() {
            return setDocumentExtractor((page, document) -> {
                if (page.getStatusCode() != 200) {
                    return null;
                }
                Task task = page.getTask();
                LinkedHashSet linkedHashSet = new LinkedHashSet();
                Iterator it = document.select("[href]").iterator();
                while (it.hasNext()) {
                    String attr = ((Element) it.next()).attr("href");
                    if (attr != null && !attr.equals("")) {
                        linkedHashSet.add(new Task(StringHelper.toAbsoluteUrl(task.getProtocol(), task.getHost(), task.getUrl(), attr)));
                    }
                }
                Iterator it2 = document.select("[src]").iterator();
                while (it2.hasNext()) {
                    String attr2 = ((Element) it2.next()).attr("src");
                    if (attr2 != null && !attr2.equals("")) {
                        linkedHashSet.add(new Task(StringHelper.toAbsoluteUrl(task.getProtocol(), task.getHost(), task.getUrl(), attr2)));
                    }
                }
                return new Result(linkedHashSet, task.toString());
            });
        }

        public Builder setTaskFilter(TaskFilter taskFilter) {
            this.fi = taskFilter;
            return this;
        }

        public JsoupProcessor build() {
            return this.ha == null ? setPageLinkDocumentExtractor().build() : new JsoupProcessor(this.ha, this.fi);
        }
    }

    private JsoupProcessor(Extractor<Document, ?> extractor, TaskFilter taskFilter) {
        this.handler = extractor;
        this.filter = taskFilter;
    }

    public static Builder custom() {
        return new Builder();
    }

    public static JsoupProcessor getDefault() {
        return new Builder().build();
    }

    protected static String getCharacterEncoding(Page page) {
        if (page.getExtra() == null || page.getExtra().get("Content-Type") == null) {
            return null;
        }
        for (String str : page.getExtra().get("Content-Type").toString().split(";")) {
            if (str.trim().startsWith("charset=")) {
                return str.trim().substring(8);
            }
        }
        return null;
    }

    public Extractor<Document, ?> getDocumentExtractor() {
        return this.handler;
    }

    private Document parse(Page page) throws UnsupportedEncodingException {
        String characterEncoding = getCharacterEncoding(page);
        return Jsoup.parse(new String(page.getContent(), characterEncoding != null ? characterEncoding : Charset.defaultCharset().name()));
    }

    @Override // cc.gospy.core.processor.Processor
    public <T> Result<T> process(Task task, Page page) throws ProcessException {
        try {
            Result<T> result = (Result<T>) this.handler.handle(page, parse(page));
            if (result != null) {
                if (result.getNewTasks() != null) {
                    result.getNewTasks().removeIf(this.filter.negate());
                }
                if (result.getPage() == null) {
                    result.setPage(page);
                }
            }
            return result;
        } catch (Throwable th) {
            throw new ProcessException(th.getMessage(), th);
        }
    }

    @Override // cc.gospy.core.processor.Processor
    public String[] getAcceptedContentType() {
        return new String[]{"text/*"};
    }
}
