package cc.gospy.core;

import cc.gospy.core.entity.Page;
import cc.gospy.core.entity.Result;
import cc.gospy.core.entity.Task;
import cc.gospy.core.fetcher.Fetcher;
import cc.gospy.core.fetcher.Fetchers;
import cc.gospy.core.pipeline.Pipeline;
import cc.gospy.core.pipeline.Pipelines;
import cc.gospy.core.processor.ExtractBy;
import cc.gospy.core.processor.PageProcessor;
import cc.gospy.core.processor.PageProcessorNotFoundException;
import cc.gospy.core.processor.PageProcessors;
import cc.gospy.core.processor.Processor;
import cc.gospy.core.processor.Processors;
import cc.gospy.core.scheduler.Observable;
import cc.gospy.core.scheduler.Recoverable;
import cc.gospy.core.scheduler.Scheduler;
import cc.gospy.core.scheduler.Schedulers;
import cc.gospy.core.scheduler.Verifiable;
import cc.gospy.core.scheduler.impl.GeneralScheduler;
import cc.gospy.core.util.Experimental;
import cc.gospy.core.util.LoggerHelper;
import cc.gospy.core.util.StringHelper;
import cc.gospy.core.util.TaskBlockedException;
import ch.qos.logback.classic.Level;
import com.brandwatch.robots.RobotsConfig;
import com.brandwatch.robots.RobotsFactory;
import com.brandwatch.robots.RobotsService;
import java.io.Closeable;
import java.io.IOException;
import java.lang.annotation.Annotation;
import java.lang.reflect.Array;
import java.lang.reflect.Field;
import java.net.URI;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.xsoup.Xsoup;

/* loaded from: input_file:cc/gospy/core/Gospy.class */
public class Gospy implements Observable {
    private static final Logger logger = LoggerFactory.getLogger(Gospy.class);
    private final String identifier;
    private Scheduler scheduler;
    private Fetchers fetcherFactory;
    private PageProcessors pageProcessorFactory;
    private Processors processorFactory;
    private Pipelines pipelineFactory;
    private ExecutorService threadPool;
    private ExceptionHandler handler;
    private int visitGapMillis;
    private volatile boolean running;
    private Thread operationChainThread;
    private RobotsService robotsService;

    /* loaded from: input_file:cc/gospy/core/Gospy$Builder.class */
    public static class Builder {
        private String id;
        private Scheduler sc;
        private Fetchers ff;
        private PageProcessors ppf;
        private Processors pf;
        private Pipelines plf;
        private ExceptionHandler eh;
        private boolean cfr;

        public Builder() {
            GeneralScheduler generalScheduler = Schedulers.GeneralScheduler;
            this.sc = GeneralScheduler.getDefault();
            this.ff = new Fetchers();
            this.ppf = new PageProcessors();
            this.pf = new Processors();
            this.plf = new Pipelines();
            this.eh = ExceptionHandler.DEFAULT;
            this.cfr = false;
        }

        public Builder setIdentifier(String str) {
            this.id = str;
            return this;
        }

        public Builder setScheduler(Scheduler scheduler) {
            this.sc = scheduler;
            return this;
        }

        public Builder setExceptionHandler(ExceptionHandler exceptionHandler) {
            this.eh = exceptionHandler;
            return this;
        }

        public Builder addFetcher(Fetcher fetcher) {
            this.ff.register(fetcher);
            return this;
        }

        public Builder addPageProcessor(Class<? extends PageProcessor> cls) {
            this.ppf.register(cls);
            return this;
        }

        public Builder addProcessor(Processor processor) {
            this.pf.register(processor);
            return this;
        }

        public Builder addPipeline(Pipeline pipeline) {
            this.plf.register(pipeline);
            return this;
        }

        public Builder checkForRobots() {
            this.cfr = true;
            return this;
        }

        public Gospy build() {
            if (this.id == null) {
                this.id = StringHelper.getRandomIdentifier();
            }
            return new Gospy(this.id, this.sc, this.ff, this.ppf, this.pf, this.plf, this.eh, this.cfr);
        }
    }

    private Gospy(String str, Scheduler scheduler, Fetchers fetchers, PageProcessors pageProcessors, Processors processors, Pipelines pipelines, ExceptionHandler exceptionHandler, boolean z) {
        this.identifier = str;
        this.scheduler = scheduler;
        this.fetcherFactory = fetchers;
        this.pageProcessorFactory = pageProcessors;
        this.processorFactory = processors;
        this.pipelineFactory = pipelines;
        this.handler = exceptionHandler;
        this.visitGapMillis = 0;
        this.running = true;
        this.operationChainThread = newOperationChainThread();
        if (z) {
            RobotsConfig robotsConfig = new RobotsConfig();
            robotsConfig.setMaxRedirectHops(3);
            robotsConfig.setRequestTimeoutMillis(2000L);
            robotsConfig.setReadTimeoutMillis(3000);
            this.robotsService = new RobotsFactory(robotsConfig).createService();
        }
    }

    public Thread newOperationChainThread() {
        return new Thread(() -> {
            while (this.running) {
                while (true) {
                    Task task = this.scheduler.getTask(this.identifier);
                    if (task != null) {
                        this.threadPool.execute(() -> {
                            Result<?> process;
                            Page page = null;
                            try {
                                Fetcher fetcher = this.fetcherFactory.get(task.getProtocol());
                                if (this.robotsService != null && !this.robotsService.isAllowed(fetcher.getUserAgent(), URI.create(task.getUrl()))) {
                                    this.handler.exceptionCaught(new TaskBlockedException("task blocked by robots.txt"), task, null);
                                }
                                page = fetcher.fetch(task);
                                try {
                                    process = invokePageProcessor(page, this.pageProcessorFactory.get(page.getTask().getUrl()));
                                } catch (PageProcessorNotFoundException e) {
                                    process = this.processorFactory.get(page.getContentType()).process(task, page);
                                }
                                if (process != null) {
                                    if (process.getNewTasks() != null) {
                                        Iterator<Task> it = process.getNewTasks().iterator();
                                        while (it.hasNext()) {
                                            this.scheduler.addTask(this.identifier, it.next());
                                        }
                                    }
                                    if (this.scheduler instanceof Verifiable) {
                                        ((Verifiable) this.scheduler).feedback(this.identifier, task);
                                    }
                                    if (process.getData() != null) {
                                        Iterator<Pipeline> it2 = this.pipelineFactory.get(process.getType()).iterator();
                                        while (it2.hasNext()) {
                                            it2.next().pipe(process);
                                        }
                                    }
                                }
                                Thread.sleep(this.visitGapMillis);
                            } catch (Throwable th) {
                                Collection<Task> exceptionCaught = this.handler.exceptionCaught(th, task, page);
                                if (exceptionCaught != null) {
                                    exceptionCaught.forEach(task2 -> {
                                        this.scheduler.addTask(this.identifier, task2);
                                    });
                                }
                            }
                        });
                    }
                }
            }
            logger.info("Operation chain stopped.");
        });
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Experimental
    private Result<?> invokePageProcessor(Page page, Class<? extends PageProcessor> cls) throws Exception {
        PageProcessor newInstance = cls.newInstance();
        newInstance.setTask(page.getTask());
        try {
            try {
                byte[] content = page.getContent();
                for (Field field : cls.getFields()) {
                    LinkedHashSet linkedHashSet = new LinkedHashSet();
                    for (Annotation annotation : field.getAnnotations()) {
                        if (annotation.annotationType() == ExtractBy.XPath.class) {
                            Document parse = Jsoup.parse(new String(content));
                            for (String str : ((ExtractBy.XPath) annotation).value()) {
                                linkedHashSet.addAll(Xsoup.compile(str).evaluate(parse).list());
                            }
                        } else if (annotation.annotationType() == ExtractBy.XPaths.class) {
                            Document parse2 = Jsoup.parse(new String(content));
                            for (ExtractBy.XPath xPath : ((ExtractBy.XPaths) annotation).value()) {
                                for (String str2 : xPath.value()) {
                                    linkedHashSet.addAll(Xsoup.compile(str2).evaluate(parse2).list());
                                }
                            }
                        } else if (annotation.annotationType() == ExtractBy.Regex.class) {
                            Matcher matcher = Pattern.compile(((ExtractBy.Regex) annotation).value()).matcher(new String(content));
                            if (matcher.find()) {
                                linkedHashSet.add(matcher.group(((ExtractBy.Regex) annotation).group()));
                            }
                        } else if (annotation.annotationType() == ExtractBy.Regexs.class) {
                            String str3 = new String(content);
                            for (ExtractBy.Regex regex : ((ExtractBy.Regexs) annotation).value()) {
                                Matcher matcher2 = Pattern.compile(regex.value()).matcher(str3);
                                if (matcher2.find()) {
                                    linkedHashSet.add(matcher2.group(regex.group()));
                                }
                            }
                        }
                    }
                    if (linkedHashSet.size() > 0) {
                        if (field.getType().isArray()) {
                            if (field.getType().getComponentType().isPrimitive()) {
                                throw new RuntimeException("We cannot cast a extracted result to a primitive type array, why not trying Object[]?");
                            }
                            Object[] array = linkedHashSet.toArray();
                            Object[] objArr = (Object[]) field.getType().cast(Array.newInstance(field.getType().getComponentType(), linkedHashSet.size()));
                            for (int i = 0; i < objArr.length; i++) {
                                try {
                                    objArr[i] = field.getType().getComponentType().cast(array[i]);
                                } catch (ClassCastException e) {
                                    throw new RuntimeException(e.getMessage() + ", please change your field:" + field.getName() + " to a castable type.");
                                }
                            }
                            field.set(newInstance, objArr);
                        } else if (Collection.class.isAssignableFrom(field.getType())) {
                            field.set(newInstance, field.getType().cast(linkedHashSet));
                        } else {
                            field.set(newInstance, field.getType().cast(linkedHashSet.iterator().next()));
                        }
                    }
                }
                newInstance.process();
                Result<?> result = new Result<>(newInstance.getNewTasks(), newInstance.getResultData());
                result.setPage(page);
                if (newInstance instanceof Closeable) {
                    ((Closeable) newInstance).close();
                }
                return result;
            } catch (Throwable th) {
                newInstance.onError(th);
                if (newInstance instanceof Closeable) {
                    ((Closeable) newInstance).close();
                }
                return null;
            }
        } catch (Throwable th2) {
            if (newInstance instanceof Closeable) {
                ((Closeable) newInstance).close();
            }
            throw th2;
        }
    }

    public void start() {
        start(1);
    }

    public void start(int i) {
        if (this.threadPool != null) {
            throw new RuntimeException("Gospy has already started.");
        }
        this.threadPool = Executors.newFixedThreadPool(i);
        logger.info("Thread pool initialized. [size={}]", Integer.valueOf(i));
        this.operationChainThread.start();
    }

    public void pause(String str) throws IOException {
        if (this.scheduler instanceof Recoverable) {
            try {
                ((Recoverable) this.scheduler).pause(str);
            } catch (Throwable th) {
                th.printStackTrace();
            }
        }
    }

    public void resume(String str) throws Throwable {
        if (this.scheduler instanceof Recoverable) {
            ((Recoverable) this.scheduler).resume(str);
        }
    }

    public void stop() {
        this.running = false;
        synchronized (this) {
            notifyAll();
        }
        this.scheduler.stop();
        this.fetcherFactory.getAll().forEach(fetcher -> {
            if (fetcher instanceof Closeable) {
                closeCloseable((Closeable) fetcher);
            }
        });
        this.processorFactory.getAll().forEach(processor -> {
            if (processor instanceof Closeable) {
                closeCloseable((Closeable) processor);
            }
        });
        this.pipelineFactory.getAll().forEach(pipeline -> {
            if (pipeline instanceof Closeable) {
                closeCloseable((Closeable) pipeline);
            }
        });
        this.threadPool.shutdownNow();
        do {
        } while (!this.threadPool.isTerminated());
        logger.info("Thread pool terminated.");
        this.threadPool = null;
    }

    private void closeCloseable(Closeable closeable) {
        try {
            closeable.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public boolean isRunning() {
        return this.running;
    }

    public Gospy addTask(Task task) {
        this.scheduler.addTask(this.identifier, task);
        return this;
    }

    public Gospy addTask(String str) {
        Task task = new Task(str.contains("://") ? str : "http://".concat(str));
        task.setSkipCheck(true);
        return addTask(task);
    }

    public Gospy addTasks(List<String> list) {
        list.forEach(str -> {
            addTask(str);
        });
        return this;
    }

    public Gospy setVisitGap(int i) {
        this.visitGapMillis = i;
        return this;
    }

    public void setLogLevel(Level level) {
        LoggerHelper.setLevel("cc.gospy.core", level);
    }

    public static Builder custom() {
        return new Builder();
    }

    private Observable getObservableScheduler() {
        if (this.scheduler instanceof Observable) {
            return (Observable) this.scheduler;
        }
        throw new RuntimeException("Scheduler [" + this.scheduler.getClass() + "] is not observable");
    }

    public boolean isObservable() {
        return this.scheduler instanceof Observable;
    }

    @Override // cc.gospy.core.scheduler.Observable
    public long getTotalTaskInputCount() {
        return getObservableScheduler().getTotalTaskInputCount();
    }

    @Override // cc.gospy.core.scheduler.Observable
    public long getTotalTaskOutputCount() {
        return getObservableScheduler().getTotalTaskOutputCount();
    }

    @Override // cc.gospy.core.scheduler.Observable
    public long getRecodedTaskSize() {
        return getObservableScheduler().getRecodedTaskSize();
    }

    @Override // cc.gospy.core.scheduler.Observable
    public long getCurrentTaskQueueSize() {
        return getObservableScheduler().getCurrentTaskQueueSize();
    }

    @Override // cc.gospy.core.scheduler.Observable
    public long getCurrentLazyTaskQueueSize() {
        return getObservableScheduler().getCurrentLazyTaskQueueSize();
    }

    @Override // cc.gospy.core.scheduler.Observable
    public long getRunningTimeMillis() {
        return getObservableScheduler().getRunningTimeMillis();
    }
}
